Annotation of embedaddon/bird2/proto/bgp/bgp.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- The Border Gateway Protocol
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6: * (c) 2008--2016 CZ.NIC z.s.p.o.
7: *
8: * Can be freely distributed and used under the terms of the GNU GPL.
9: */
10:
11: /**
12: * DOC: Border Gateway Protocol
13: *
14: * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
15: * the connection and most of the interface with BIRD core, |packets.c| handling
16: * both incoming and outgoing BGP packets and |attrs.c| containing functions for
17: * manipulation with BGP attribute lists.
18: *
19: * As opposed to the other existing routing daemons, BIRD has a sophisticated
20: * core architecture which is able to keep all the information needed by BGP in
21: * the primary routing table, therefore no complex data structures like a
22: * central BGP table are needed. This increases memory footprint of a BGP router
23: * with many connections, but not too much and, which is more important, it
24: * makes BGP much easier to implement.
25: *
26: * Each instance of BGP (corresponding to a single BGP peer) is described by a
27: * &bgp_proto structure to which are attached individual connections represented
28: * by &bgp_connection (usually, there exists only one connection, but during BGP
29: * session setup, there can be more of them). The connections are handled
30: * according to the BGP state machine defined in the RFC with all the timers and
31: * all the parameters configurable.
32: *
33: * In incoming direction, we listen on the connection's socket and each time we
34: * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
35: * markers and passes complete packets to bgp_rx_packet() which distributes the
36: * packet according to its type.
37: *
38: * In outgoing direction, we gather all the routing updates and sort them to
39: * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
40: * fast comparison of &rta's and a &fib which helps us to find if we already
41: * have another route for the same destination queued for sending, so that we
42: * can replace it with the new one immediately instead of sending both
43: * updates). There also exists a special bucket holding all the route
44: * withdrawals which cannot be queued anywhere else as they don't have any
45: * attributes. If we have any packet to send (due to either new routes or the
46: * connection tracking code wanting to send a Open, Keepalive or Notification
47: * message), we call bgp_schedule_packet() which sets the corresponding bit in a
48: * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
49: * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
50: * packet type bits and calls the corresponding bgp_create_xx() functions,
51: * eventually rescheduling the same packet type if we have more data of the same
52: * type to send.
53: *
54: * The processing of attributes consists of two functions: bgp_decode_attrs()
55: * for checking of the attribute blocks and translating them to the language of
56: * BIRD's extended attributes and bgp_encode_attrs() which does the
57: * converse. Both functions are built around a @bgp_attr_table array describing
58: * all important characteristics of all known attributes. Unknown transitive
59: * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
60: *
61: * BGP protocol implements graceful restart in both restarting (local restart)
62: * and receiving (neighbor restart) roles. The first is handled mostly by the
63: * graceful restart code in the nest, BGP protocol just handles capabilities,
64: * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
65: * The second is implemented by internal restart of the BGP state to %BS_IDLE
66: * and protocol state to %PS_START, but keeping the protocol up from the core
67: * point of view and therefore maintaining received routes. Routing table
68: * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
69: * stale routes after reestablishment of BGP session during graceful restart.
70: *
71: * Supported standards:
72: * RFC 4271 - Border Gateway Protocol 4 (BGP)
73: * RFC 1997 - BGP Communities Attribute
74: * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
75: * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
76: * RFC 2918 - Route Refresh Capability
77: * RFC 3107 - Carrying Label Information in BGP
78: * RFC 4360 - BGP Extended Communities Attribute
79: * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
80: * RFC 4456 - BGP Route Reflection
81: * RFC 4486 - Subcodes for BGP Cease Notification Message
82: * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
83: * RFC 4724 - Graceful Restart Mechanism for BGP
84: * RFC 4760 - Multiprotocol extensions for BGP
85: * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
86: * RFC 5065 - AS confederations for BGP
87: * RFC 5082 - Generalized TTL Security Mechanism
88: * RFC 5492 - Capabilities Advertisement with BGP
89: * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
90: * RFC 5575 - Dissemination of Flow Specification Rules
91: * RFC 5668 - 4-Octet AS Specific BGP Extended Community
92: * RFC 6286 - AS-Wide Unique BGP Identifier
93: * RFC 6608 - Subcodes for BGP Finite State Machine Error
94: * RFC 6793 - BGP Support for 4-Octet AS Numbers
95: * RFC 7311 - Accumulated IGP Metric Attribute for BGP
96: * RFC 7313 - Enhanced Route Refresh Capability for BGP
97: * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
98: * RFC 7911 - Advertisement of Multiple Paths in BGP
99: * RFC 7947 - Internet Exchange BGP Route Server
100: * RFC 8092 - BGP Large Communities Attribute
101: * RFC 8203 - BGP Administrative Shutdown Communication
102: * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
103: * draft-ietf-idr-bgp-extended-messages-27
104: * draft-ietf-idr-ext-opt-param-07
105: * draft-uttaro-idr-bgp-persistence-04
106: */
107:
108: #undef LOCAL_DEBUG
109:
110: #include <stdlib.h>
111:
112: #include "nest/bird.h"
113: #include "nest/iface.h"
114: #include "nest/protocol.h"
115: #include "nest/route.h"
116: #include "nest/cli.h"
117: #include "nest/locks.h"
118: #include "conf/conf.h"
119: #include "filter/filter.h"
120: #include "lib/socket.h"
121: #include "lib/resource.h"
122: #include "lib/string.h"
123:
124: #include "bgp.h"
125:
126:
127: struct linpool *bgp_linpool; /* Global temporary pool */
128: struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
129: static list bgp_sockets; /* Global list of listening sockets */
130:
131:
132: static void bgp_connect(struct bgp_proto *p);
133: static void bgp_active(struct bgp_proto *p);
134: static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
135: static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
136: static void bgp_send_open(struct bgp_conn *conn);
137: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
138:
139: static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
140: static void bgp_listen_sock_err(sock *sk UNUSED, int err);
141:
142: /**
143: * bgp_open - open a BGP instance
144: * @p: BGP instance
145: *
146: * This function allocates and configures shared BGP resources, mainly listening
147: * sockets. Should be called as the last step during initialization (when lock
148: * is acquired and neighbor is ready). When error, caller should change state to
149: * PS_DOWN and return immediately.
150: */
151: static int
152: bgp_open(struct bgp_proto *p)
153: {
154: struct bgp_socket *bs = NULL;
155: struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
156: ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
157: (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
158: uint port = p->cf->local_port;
159:
160: /* FIXME: Add some global init? */
161: if (!bgp_linpool)
162: init_list(&bgp_sockets);
163:
164: /* We assume that cf->iface is defined iff cf->local_ip is link-local */
165:
166: WALK_LIST(bs, bgp_sockets)
167: if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) &&
168: (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf))
169: {
170: bs->uc++;
171: p->sock = bs;
172: return 0;
173: }
174:
175: sock *sk = sk_new(proto_pool);
176: sk->type = SK_TCP_PASSIVE;
177: sk->ttl = 255;
178: sk->saddr = addr;
179: sk->sport = port;
180: sk->iface = ifa;
181: sk->vrf = p->p.vrf;
182: sk->flags = 0;
183: sk->tos = IP_PREC_INTERNET_CONTROL;
184: sk->rbsize = BGP_RX_BUFFER_SIZE;
185: sk->tbsize = BGP_TX_BUFFER_SIZE;
186: sk->rx_hook = bgp_incoming_connection;
187: sk->err_hook = bgp_listen_sock_err;
188:
189: if (sk_open(sk) < 0)
190: goto err;
191:
192: bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
193: bs->sk = sk;
194: bs->uc = 1;
195: p->sock = bs;
196: sk->data = bs;
197:
198: add_tail(&bgp_sockets, &bs->n);
199:
200: if (!bgp_linpool)
201: {
202: bgp_linpool = lp_new_default(proto_pool);
203: bgp_linpool2 = lp_new_default(proto_pool);
204: }
205:
206: return 0;
207:
208: err:
209: sk_log_error(sk, p->p.name);
210: log(L_ERR "%s: Cannot open listening socket", p->p.name);
211: rfree(sk);
212: return -1;
213: }
214:
215: /**
216: * bgp_close - close a BGP instance
217: * @p: BGP instance
218: *
219: * This function frees and deconfigures shared BGP resources.
220: */
221: static void
222: bgp_close(struct bgp_proto *p)
223: {
224: struct bgp_socket *bs = p->sock;
225:
226: ASSERT(bs && bs->uc);
227:
228: if (--bs->uc)
229: return;
230:
231: rfree(bs->sk);
232: rem_node(&bs->n);
233: mb_free(bs);
234:
235: if (!EMPTY_LIST(bgp_sockets))
236: return;
237:
238: rfree(bgp_linpool);
239: bgp_linpool = NULL;
240:
241: rfree(bgp_linpool2);
242: bgp_linpool2 = NULL;
243: }
244:
245: static inline int
246: bgp_setup_auth(struct bgp_proto *p, int enable)
247: {
248: if (p->cf->password)
249: {
250: int rv = sk_set_md5_auth(p->sock->sk,
251: p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
252: enable ? p->cf->password : NULL, p->cf->setkey);
253:
254: if (rv < 0)
255: sk_log_error(p->sock->sk, p->p.name);
256:
257: return rv;
258: }
259: else
260: return 0;
261: }
262:
263: static inline struct bgp_channel *
264: bgp_find_channel(struct bgp_proto *p, u32 afi)
265: {
266: struct bgp_channel *c;
267: WALK_LIST(c, p->p.channels)
268: if (c->afi == afi)
269: return c;
270:
271: return NULL;
272: }
273:
274: static void
275: bgp_startup(struct bgp_proto *p)
276: {
277: BGP_TRACE(D_EVENTS, "Started");
278: p->start_state = BSS_CONNECT;
279:
280: if (!p->passive)
281: bgp_active(p);
282:
283: if (p->postponed_sk)
284: {
285: /* Apply postponed incoming connection */
286: bgp_setup_conn(p, &p->incoming_conn);
287: bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
288: bgp_send_open(&p->incoming_conn);
289: p->postponed_sk = NULL;
290: }
291: }
292:
293: static void
294: bgp_startup_timeout(timer *t)
295: {
296: bgp_startup(t->data);
297: }
298:
299:
300: static void
301: bgp_initiate(struct bgp_proto *p)
302: {
303: int err_val;
304:
305: if (bgp_open(p) < 0)
306: { err_val = BEM_NO_SOCKET; goto err1; }
307:
308: if (bgp_setup_auth(p, 1) < 0)
309: { err_val = BEM_INVALID_MD5; goto err2; }
310:
311: if (p->cf->bfd)
312: bgp_update_bfd(p, p->cf->bfd);
313:
314: if (p->startup_delay)
315: {
316: p->start_state = BSS_DELAY;
317: BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
318: bgp_start_timer(p->startup_timer, p->startup_delay);
319: }
320: else
321: bgp_startup(p);
322:
323: return;
324:
325: err2:
326: bgp_close(p);
327: err1:
328: p->p.disabled = 1;
329: bgp_store_error(p, NULL, BE_MISC, err_val);
330: proto_notify_state(&p->p, PS_DOWN);
331:
332: return;
333: }
334:
335: /**
336: * bgp_start_timer - start a BGP timer
337: * @t: timer
338: * @value: time (in seconds) to fire (0 to disable the timer)
339: *
340: * This functions calls tm_start() on @t with time @value and the amount of
341: * randomization suggested by the BGP standard. Please use it for all BGP
342: * timers.
343: */
344: void
345: bgp_start_timer(timer *t, uint value)
346: {
347: if (value)
348: {
349: /* The randomization procedure is specified in RFC 4271 section 10 */
350: btime time = value S;
351: btime randomize = random() % ((time / 4) + 1);
352: tm_start(t, time - randomize);
353: }
354: else
355: tm_stop(t);
356: }
357:
358: /**
359: * bgp_close_conn - close a BGP connection
360: * @conn: connection to close
361: *
362: * This function takes a connection described by the &bgp_conn structure, closes
363: * its socket and frees all resources associated with it.
364: */
365: void
366: bgp_close_conn(struct bgp_conn *conn)
367: {
368: // struct bgp_proto *p = conn->bgp;
369:
370: DBG("BGP: Closing connection\n");
371: conn->packets_to_send = 0;
372: conn->channels_to_send = 0;
373: rfree(conn->connect_timer);
374: conn->connect_timer = NULL;
375: rfree(conn->keepalive_timer);
376: conn->keepalive_timer = NULL;
377: rfree(conn->hold_timer);
378: conn->hold_timer = NULL;
379: rfree(conn->tx_ev);
380: conn->tx_ev = NULL;
381: rfree(conn->sk);
382: conn->sk = NULL;
383:
384: mb_free(conn->local_caps);
385: conn->local_caps = NULL;
386: mb_free(conn->remote_caps);
387: conn->remote_caps = NULL;
388: }
389:
390:
391: /**
392: * bgp_update_startup_delay - update a startup delay
393: * @p: BGP instance
394: *
395: * This function updates a startup delay that is used to postpone next BGP
396: * connect. It also handles disable_after_error and might stop BGP instance
397: * when error happened and disable_after_error is on.
398: *
399: * It should be called when BGP protocol error happened.
400: */
401: void
402: bgp_update_startup_delay(struct bgp_proto *p)
403: {
404: const struct bgp_config *cf = p->cf;
405:
406: DBG("BGP: Updating startup delay\n");
407:
408: if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
409: p->startup_delay = 0;
410:
411: p->last_proto_error = current_time();
412:
413: if (cf->disable_after_error)
414: {
415: p->startup_delay = 0;
416: p->p.disabled = 1;
417: return;
418: }
419:
420: if (!p->startup_delay)
421: p->startup_delay = cf->error_delay_time_min;
422: else
423: p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
424: }
425:
426: static void
427: bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
428: {
429: switch (conn->state)
430: {
431: case BS_IDLE:
432: case BS_CLOSE:
433: return;
434:
435: case BS_CONNECT:
436: case BS_ACTIVE:
437: bgp_conn_enter_idle_state(conn);
438: return;
439:
440: case BS_OPENSENT:
441: case BS_OPENCONFIRM:
442: case BS_ESTABLISHED:
443: if (subcode < 0)
444: {
445: bgp_conn_enter_close_state(conn);
446: bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
447: }
448: else
449: bgp_error(conn, 6, subcode, data, len);
450: return;
451:
452: default:
453: bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
454: }
455: }
456:
457: static void
458: bgp_down(struct bgp_proto *p)
459: {
460: if (p->start_state > BSS_PREPARE)
461: {
462: bgp_setup_auth(p, 0);
463: bgp_close(p);
464: }
465:
466: BGP_TRACE(D_EVENTS, "Down");
467: proto_notify_state(&p->p, PS_DOWN);
468: }
469:
470: static void
471: bgp_decision(void *vp)
472: {
473: struct bgp_proto *p = vp;
474:
475: DBG("BGP: Decision start\n");
476: if ((p->p.proto_state == PS_START) &&
477: (p->outgoing_conn.state == BS_IDLE) &&
478: (p->incoming_conn.state != BS_OPENCONFIRM) &&
479: !p->passive)
480: bgp_active(p);
481:
482: if ((p->p.proto_state == PS_STOP) &&
483: (p->outgoing_conn.state == BS_IDLE) &&
484: (p->incoming_conn.state == BS_IDLE))
485: bgp_down(p);
486: }
487:
488: static struct bgp_proto *
489: bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
490: {
491: struct symbol *sym;
492: char fmt[SYM_MAX_LEN];
493:
494: bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
495:
496: /* This is hack, we would like to share config, but we need to copy it now */
497: new_config = config;
498: cfg_mem = config->mem;
499: conf_this_scope = config->root_scope;
500: sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
501: proto_clone_config(sym, pp->p.cf);
502: new_config = NULL;
503: cfg_mem = NULL;
504:
505: /* Just pass remote_ip to bgp_init() */
506: ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
507:
508: return (void *) proto_spawn(sym->proto, 0);
509: }
510:
511: void
512: bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
513: {
514: proto_notify_state(&p->p, PS_STOP);
515: bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
516: bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
517: ev_schedule(p->event);
518: }
519:
520: static inline void
521: bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
522: {
523: if (conn->bgp->p.mrtdump & MD_STATES)
524: bgp_dump_state_change(conn, conn->state, new_state);
525:
526: conn->state = new_state;
527: }
528:
529: void
530: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
531: {
532: /* Really, most of the work is done in bgp_rx_open(). */
533: bgp_conn_set_state(conn, BS_OPENCONFIRM);
534: }
535:
536: static const struct bgp_af_caps dummy_af_caps = { };
537: static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
538:
539: void
540: bgp_conn_enter_established_state(struct bgp_conn *conn)
541: {
542: struct bgp_proto *p = conn->bgp;
543: struct bgp_caps *local = conn->local_caps;
544: struct bgp_caps *peer = conn->remote_caps;
545: struct bgp_channel *c;
546:
547: BGP_TRACE(D_EVENTS, "BGP session established");
548:
549: /* For multi-hop BGP sessions */
550: if (ipa_zero(p->local_ip))
551: p->local_ip = conn->sk->saddr;
552:
553: /* For promiscuous sessions */
554: if (!p->remote_as)
555: p->remote_as = conn->received_as;
556:
557: /* In case of LLv6 is not valid during BGP start */
558: if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
559: p->link_addr = p->neigh->iface->llv6->ip;
560:
561: conn->sk->fast_rx = 0;
562:
563: p->conn = conn;
564: p->last_error_class = 0;
565: p->last_error_code = 0;
566:
567: p->as4_session = conn->as4_session;
568:
569: p->route_refresh = peer->route_refresh;
570: p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
571:
572: /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
573: p->gr_ready = p->llgr_ready = 0; /* Updated later */
574:
575: /* Whether peer is ready to handle our GR recovery */
576: int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
577:
578: if (p->gr_active_num)
579: tm_stop(p->gr_timer);
580:
581: /* Number of active channels */
582: int num = 0;
583:
584: /* Summary state of ADD_PATH RX for active channels */
585: uint summary_add_path_rx = 0;
586:
587: WALK_LIST(c, p->p.channels)
588: {
589: const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
590: const struct bgp_af_caps *rem = bgp_find_af_caps(peer, c->afi);
591:
592: /* Use default if capabilities were not announced */
593: if (!local->length && (c->afi == BGP_AF_IPV4))
594: loc = &basic_af_caps;
595:
596: if (!peer->length && (c->afi == BGP_AF_IPV4))
597: rem = &basic_af_caps;
598:
599: /* Ignore AFIs that were not announced in multiprotocol capability */
600: if (!loc || !loc->ready)
601: loc = &dummy_af_caps;
602:
603: if (!rem || !rem->ready)
604: rem = &dummy_af_caps;
605:
606: int active = loc->ready && rem->ready;
607: c->c.disabled = !active;
608: c->c.reloadable = p->route_refresh || c->cf->import_table;
609:
610: c->index = active ? num++ : 0;
611:
612: c->feed_state = BFS_NONE;
613: c->load_state = BFS_NONE;
614:
615: /* Channels where peer may do GR */
616: uint gr_ready = active && local->gr_aware && rem->gr_able;
617: uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
618:
619: c->gr_ready = gr_ready || llgr_ready;
620: p->gr_ready = p->gr_ready || c->gr_ready;
621: p->llgr_ready = p->llgr_ready || llgr_ready;
622:
623: /* Remember last LLGR stale time */
624: c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
625:
626: /* Channels not able to recover gracefully */
627: if (p->p.gr_recovery && (!active || !peer_gr_ready))
628: channel_graceful_restart_unlock(&c->c);
629:
630: /* Channels waiting for local convergence */
631: if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
632: c->c.gr_wait = 1;
633:
634: /* Channels where regular graceful restart failed */
635: if ((c->gr_active == BGP_GRS_ACTIVE) &&
636: !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
637: bgp_graceful_restart_done(c);
638:
639: /* Channels where regular long-lived restart failed */
640: if ((c->gr_active == BGP_GRS_LLGR) &&
641: !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
642: bgp_graceful_restart_done(c);
643:
644: /* GR capability implies that neighbor will send End-of-RIB */
645: if (peer->gr_aware)
646: c->load_state = BFS_LOADING;
647:
648: c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
649: c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
650: c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
651:
652: if (active)
653: summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
654:
655: /* Update RA mode */
656: if (c->add_path_tx)
657: c->c.ra_mode = RA_ANY;
658: else if (c->cf->secondary)
659: c->c.ra_mode = RA_ACCEPTED;
660: else
661: c->c.ra_mode = RA_OPTIMAL;
662: }
663:
664: p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
665: p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
666: p->channel_count = num;
667: p->summary_add_path_rx = summary_add_path_rx;
668:
669: WALK_LIST(c, p->p.channels)
670: {
671: if (c->c.disabled)
672: continue;
673:
674: p->afi_map[c->index] = c->afi;
675: p->channel_map[c->index] = c;
676: }
677:
678: /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
679:
680: bgp_conn_set_state(conn, BS_ESTABLISHED);
681: proto_notify_state(&p->p, PS_UP);
682: }
683:
684: static void
685: bgp_conn_leave_established_state(struct bgp_proto *p)
686: {
687: BGP_TRACE(D_EVENTS, "BGP session closed");
688: p->conn = NULL;
689:
690: if (p->p.proto_state == PS_UP)
691: bgp_stop(p, 0, NULL, 0);
692: }
693:
694: void
695: bgp_conn_enter_close_state(struct bgp_conn *conn)
696: {
697: struct bgp_proto *p = conn->bgp;
698: int os = conn->state;
699:
700: bgp_conn_set_state(conn, BS_CLOSE);
701: tm_stop(conn->keepalive_timer);
702: conn->sk->rx_hook = NULL;
703:
704: /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
705: bgp_start_timer(conn->hold_timer, 10);
706:
707: if (os == BS_ESTABLISHED)
708: bgp_conn_leave_established_state(p);
709: }
710:
711: void
712: bgp_conn_enter_idle_state(struct bgp_conn *conn)
713: {
714: struct bgp_proto *p = conn->bgp;
715: int os = conn->state;
716:
717: bgp_close_conn(conn);
718: bgp_conn_set_state(conn, BS_IDLE);
719: ev_schedule(p->event);
720:
721: if (os == BS_ESTABLISHED)
722: bgp_conn_leave_established_state(p);
723: }
724:
725: /**
726: * bgp_handle_graceful_restart - handle detected BGP graceful restart
727: * @p: BGP instance
728: *
729: * This function is called when a BGP graceful restart of the neighbor is
730: * detected (when the TCP connection fails or when a new TCP connection
731: * appears). The function activates processing of the restart - starts routing
732: * table refresh cycle and activates BGP restart timer. The protocol state goes
733: * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
734: * caller.
735: */
736: void
737: bgp_handle_graceful_restart(struct bgp_proto *p)
738: {
739: ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
740:
741: BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
742: p->gr_active_num ? " - already pending" : "");
743:
744: p->gr_active_num = 0;
745:
746: struct bgp_channel *c;
747: WALK_LIST(c, p->p.channels)
748: {
749: /* FIXME: perhaps check for channel state instead of disabled flag? */
750: if (c->c.disabled)
751: continue;
752:
753: if (c->gr_ready)
754: {
755: p->gr_active_num++;
756:
757: switch (c->gr_active)
758: {
759: case BGP_GRS_NONE:
760: c->gr_active = BGP_GRS_ACTIVE;
761: rt_refresh_begin(c->c.table, &c->c);
762: break;
763:
764: case BGP_GRS_ACTIVE:
765: rt_refresh_end(c->c.table, &c->c);
766: rt_refresh_begin(c->c.table, &c->c);
767: break;
768:
769: case BGP_GRS_LLGR:
770: rt_refresh_begin(c->c.table, &c->c);
771: rt_modify_stale(c->c.table, &c->c);
772: break;
773: }
774: }
775: else
776: {
777: /* Just flush the routes */
778: rt_refresh_begin(c->c.table, &c->c);
779: rt_refresh_end(c->c.table, &c->c);
780: }
781:
782: /* Reset bucket and prefix tables */
783: bgp_free_bucket_table(c);
784: bgp_free_prefix_table(c);
785: bgp_init_bucket_table(c);
786: bgp_init_prefix_table(c);
787: c->packets_to_send = 0;
788: }
789:
790: /* p->gr_ready -> at least one active channel is c->gr_ready */
791: ASSERT(p->gr_active_num > 0);
792:
793: proto_notify_state(&p->p, PS_START);
794: tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
795: }
796:
797: /**
798: * bgp_graceful_restart_done - finish active BGP graceful restart
799: * @c: BGP channel
800: *
801: * This function is called when the active BGP graceful restart of the neighbor
802: * should be finished for channel @c - either successfully (the neighbor sends
803: * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
804: * unsuccessfully (the neighbor does not support BGP graceful restart on the new
805: * session). The function ends the routing table refresh cycle.
806: */
807: void
808: bgp_graceful_restart_done(struct bgp_channel *c)
809: {
810: struct bgp_proto *p = (void *) c->c.proto;
811:
812: ASSERT(c->gr_active);
813: c->gr_active = 0;
814: p->gr_active_num--;
815:
816: if (!p->gr_active_num)
817: BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
818:
819: tm_stop(c->stale_timer);
820: rt_refresh_end(c->c.table, &c->c);
821: }
822:
823: /**
824: * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
825: * @t: timer
826: *
827: * This function is a timeout hook for @gr_timer, implementing BGP restart time
828: * limit for reestablisment of the BGP session after the graceful restart. When
829: * fired, we just proceed with the usual protocol restart.
830: */
831:
832: static void
833: bgp_graceful_restart_timeout(timer *t)
834: {
835: struct bgp_proto *p = t->data;
836:
837: BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
838:
839: if (p->llgr_ready)
840: {
841: struct bgp_channel *c;
842: WALK_LIST(c, p->p.channels)
843: {
844: /* Channel is not in GR and is already flushed */
845: if (!c->gr_active)
846: continue;
847:
848: /* Channel is already in LLGR from past restart */
849: if (c->gr_active == BGP_GRS_LLGR)
850: continue;
851:
852: /* Channel is in GR, but does not support LLGR -> stop GR */
853: if (!c->stale_time)
854: {
855: bgp_graceful_restart_done(c);
856: continue;
857: }
858:
859: /* Channel is in GR, and supports LLGR -> start LLGR */
860: c->gr_active = BGP_GRS_LLGR;
861: tm_start(c->stale_timer, c->stale_time S);
862: rt_modify_stale(c->c.table, &c->c);
863: }
864: }
865: else
866: bgp_stop(p, 0, NULL, 0);
867: }
868:
869: static void
870: bgp_long_lived_stale_timeout(timer *t)
871: {
872: struct bgp_channel *c = t->data;
873: struct bgp_proto *p = (void *) c->c.proto;
874:
875: BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
876:
877: bgp_graceful_restart_done(c);
878: }
879:
880:
881: /**
882: * bgp_refresh_begin - start incoming enhanced route refresh sequence
883: * @c: BGP channel
884: *
885: * This function is called when an incoming enhanced route refresh sequence is
886: * started by the neighbor, demarcated by the BoRR packet. The function updates
887: * the load state and starts the routing table refresh cycle. Note that graceful
888: * restart also uses routing table refresh cycle, but RFC 7313 and load states
889: * ensure that these two sequences do not overlap.
890: */
891: void
892: bgp_refresh_begin(struct bgp_channel *c)
893: {
894: struct bgp_proto *p = (void *) c->c.proto;
895:
896: if (c->load_state == BFS_LOADING)
897: { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
898:
899: c->load_state = BFS_REFRESHING;
900: rt_refresh_begin(c->c.table, &c->c);
901:
902: if (c->c.in_table)
903: rt_refresh_begin(c->c.in_table, &c->c);
904: }
905:
906: /**
907: * bgp_refresh_end - finish incoming enhanced route refresh sequence
908: * @c: BGP channel
909: *
910: * This function is called when an incoming enhanced route refresh sequence is
911: * finished by the neighbor, demarcated by the EoRR packet. The function updates
912: * the load state and ends the routing table refresh cycle. Routes not received
913: * during the sequence are removed by the nest.
914: */
915: void
916: bgp_refresh_end(struct bgp_channel *c)
917: {
918: struct bgp_proto *p = (void *) c->c.proto;
919:
920: if (c->load_state != BFS_REFRESHING)
921: { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
922:
923: c->load_state = BFS_NONE;
924: rt_refresh_end(c->c.table, &c->c);
925:
926: if (c->c.in_table)
927: rt_prune_sync(c->c.in_table, 0);
928: }
929:
930:
931: static void
932: bgp_send_open(struct bgp_conn *conn)
933: {
934: DBG("BGP: Sending open\n");
935: conn->sk->rx_hook = bgp_rx;
936: conn->sk->tx_hook = bgp_tx;
937: tm_stop(conn->connect_timer);
938: bgp_prepare_capabilities(conn);
939: bgp_schedule_packet(conn, NULL, PKT_OPEN);
940: bgp_conn_set_state(conn, BS_OPENSENT);
941: bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
942: }
943:
944: static void
945: bgp_connected(sock *sk)
946: {
947: struct bgp_conn *conn = sk->data;
948: struct bgp_proto *p = conn->bgp;
949:
950: BGP_TRACE(D_EVENTS, "Connected");
951: bgp_send_open(conn);
952: }
953:
954: static void
955: bgp_connect_timeout(timer *t)
956: {
957: struct bgp_conn *conn = t->data;
958: struct bgp_proto *p = conn->bgp;
959:
960: DBG("BGP: connect_timeout\n");
961: if (p->p.proto_state == PS_START)
962: {
963: bgp_close_conn(conn);
964: bgp_connect(p);
965: }
966: else
967: bgp_conn_enter_idle_state(conn);
968: }
969:
970: static void
971: bgp_sock_err(sock *sk, int err)
972: {
973: struct bgp_conn *conn = sk->data;
974: struct bgp_proto *p = conn->bgp;
975:
976: /*
977: * This error hook may be called either asynchronously from main
978: * loop, or synchronously from sk_send(). But sk_send() is called
979: * only from bgp_tx() and bgp_kick_tx(), which are both called
980: * asynchronously from main loop. Moreover, they end if err hook is
981: * called. Therefore, we could suppose that it is always called
982: * asynchronously.
983: */
984:
985: bgp_store_error(p, conn, BE_SOCKET, err);
986:
987: if (err)
988: BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
989: else
990: BGP_TRACE(D_EVENTS, "Connection closed");
991:
992: if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
993: bgp_handle_graceful_restart(p);
994:
995: bgp_conn_enter_idle_state(conn);
996: }
997:
998: static void
999: bgp_hold_timeout(timer *t)
1000: {
1001: struct bgp_conn *conn = t->data;
1002: struct bgp_proto *p = conn->bgp;
1003:
1004: DBG("BGP: Hold timeout\n");
1005:
1006: /* We are already closing the connection - just do hangup */
1007: if (conn->state == BS_CLOSE)
1008: {
1009: BGP_TRACE(D_EVENTS, "Connection stalled");
1010: bgp_conn_enter_idle_state(conn);
1011: return;
1012: }
1013:
1014: /* If there is something in input queue, we are probably congested
1015: and perhaps just not processed BGP packets in time. */
1016:
1017: if (sk_rx_ready(conn->sk) > 0)
1018: bgp_start_timer(conn->hold_timer, 10);
1019: else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
1020: {
1021: BGP_TRACE(D_EVENTS, "Hold timer expired");
1022: bgp_handle_graceful_restart(p);
1023: bgp_conn_enter_idle_state(conn);
1024: }
1025: else
1026: bgp_error(conn, 4, 0, NULL, 0);
1027: }
1028:
1029: static void
1030: bgp_keepalive_timeout(timer *t)
1031: {
1032: struct bgp_conn *conn = t->data;
1033:
1034: DBG("BGP: Keepalive timer\n");
1035: bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
1036:
1037: /* Kick TX a bit faster */
1038: if (ev_active(conn->tx_ev))
1039: ev_run(conn->tx_ev);
1040: }
1041:
1042: static void
1043: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
1044: {
1045: conn->sk = NULL;
1046: conn->bgp = p;
1047:
1048: conn->packets_to_send = 0;
1049: conn->channels_to_send = 0;
1050: conn->last_channel = 0;
1051: conn->last_channel_count = 0;
1052:
1053: conn->connect_timer = tm_new_init(p->p.pool, bgp_connect_timeout, conn, 0, 0);
1054: conn->hold_timer = tm_new_init(p->p.pool, bgp_hold_timeout, conn, 0, 0);
1055: conn->keepalive_timer = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
1056:
1057: conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
1058: }
1059:
1060: static void
1061: bgp_setup_sk(struct bgp_conn *conn, sock *s)
1062: {
1063: s->data = conn;
1064: s->err_hook = bgp_sock_err;
1065: s->fast_rx = 1;
1066: conn->sk = s;
1067: }
1068:
1069: static void
1070: bgp_active(struct bgp_proto *p)
1071: {
1072: int delay = MAX(1, p->cf->connect_delay_time);
1073: struct bgp_conn *conn = &p->outgoing_conn;
1074:
1075: BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
1076: bgp_setup_conn(p, conn);
1077: bgp_conn_set_state(conn, BS_ACTIVE);
1078: bgp_start_timer(conn->connect_timer, delay);
1079: }
1080:
1081: /**
1082: * bgp_connect - initiate an outgoing connection
1083: * @p: BGP instance
1084: *
1085: * The bgp_connect() function creates a new &bgp_conn and initiates
1086: * a TCP connection to the peer. The rest of connection setup is governed
1087: * by the BGP state machine as described in the standard.
1088: */
1089: static void
1090: bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
1091: {
1092: struct bgp_conn *conn = &p->outgoing_conn;
1093: int hops = p->cf->multihop ? : 1;
1094:
1095: DBG("BGP: Connecting\n");
1096: sock *s = sk_new(p->p.pool);
1097: s->type = SK_TCP_ACTIVE;
1098: s->saddr = p->local_ip;
1099: s->daddr = p->remote_ip;
1100: s->dport = p->cf->remote_port;
1101: s->iface = p->neigh ? p->neigh->iface : NULL;
1102: s->vrf = p->p.vrf;
1103: s->ttl = p->cf->ttl_security ? 255 : hops;
1104: s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
1105: s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
1106: s->tos = IP_PREC_INTERNET_CONTROL;
1107: s->password = p->cf->password;
1108: s->tx_hook = bgp_connected;
1109: BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
1110: s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
1111: s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
1112: bgp_setup_conn(p, conn);
1113: bgp_setup_sk(conn, s);
1114: bgp_conn_set_state(conn, BS_CONNECT);
1115:
1116: if (sk_open(s) < 0)
1117: goto err;
1118:
1119: /* Set minimal receive TTL if needed */
1120: if (p->cf->ttl_security)
1121: if (sk_set_min_ttl(s, 256 - hops) < 0)
1122: goto err;
1123:
1124: DBG("BGP: Waiting for connect success\n");
1125: bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
1126: return;
1127:
1128: err:
1129: sk_log_error(s, p->p.name);
1130: bgp_sock_err(s, 0);
1131: return;
1132: }
1133:
1134: static inline int bgp_is_dynamic(struct bgp_proto *p)
1135: { return ipa_zero(p->remote_ip); }
1136:
1137: /**
1138: * bgp_find_proto - find existing proto for incoming connection
1139: * @sk: TCP socket
1140: *
1141: */
1142: static struct bgp_proto *
1143: bgp_find_proto(sock *sk)
1144: {
1145: struct bgp_proto *best = NULL;
1146: struct bgp_proto *p;
1147:
1148: /* sk->iface is valid only if src or dst address is link-local */
1149: int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
1150:
1151: WALK_LIST(p, proto_list)
1152: if ((p->p.proto == &proto_bgp) &&
1153: (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
1154: (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
1155: (p->p.vrf == sk->vrf) &&
1156: (p->cf->local_port == sk->sport) &&
1157: (!link || (p->cf->iface == sk->iface)) &&
1158: (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
1159: {
1160: best = p;
1161:
1162: if (!bgp_is_dynamic(p))
1163: break;
1164: }
1165:
1166: return best;
1167: }
1168:
1169: /**
1170: * bgp_incoming_connection - handle an incoming connection
1171: * @sk: TCP socket
1172: * @dummy: unused
1173: *
1174: * This function serves as a socket hook for accepting of new BGP
1175: * connections. It searches a BGP instance corresponding to the peer
1176: * which has connected and if such an instance exists, it creates a
1177: * &bgp_conn structure, attaches it to the instance and either sends
1178: * an Open message or (if there already is an active connection) it
1179: * closes the new connection by sending a Notification message.
1180: */
1181: static int
1182: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
1183: {
1184: struct bgp_proto *p;
1185: int acc, hops;
1186:
1187: DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
1188: p = bgp_find_proto(sk);
1189: if (!p)
1190: {
1191: log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
1192: sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
1193: rfree(sk);
1194: return 0;
1195: }
1196:
1197: /*
1198: * BIRD should keep multiple incoming connections in OpenSent state (for
1199: * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
1200: * connections are rejected istead. The exception is the case where an
1201: * incoming connection triggers a graceful restart.
1202: */
1203:
1204: acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
1205: (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
1206:
1207: if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1208: {
1209: bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
1210: bgp_handle_graceful_restart(p);
1211: bgp_conn_enter_idle_state(p->conn);
1212: acc = 1;
1213:
1214: /* There might be separate incoming connection in OpenSent state */
1215: if (p->incoming_conn.state > BS_ACTIVE)
1216: bgp_close_conn(&p->incoming_conn);
1217: }
1218:
1219: BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
1220: sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
1221: sk->dport, acc ? "accepted" : "rejected");
1222:
1223: if (!acc)
1224: {
1225: rfree(sk);
1226: return 0;
1227: }
1228:
1229: hops = p->cf->multihop ? : 1;
1230:
1231: if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
1232: goto err;
1233:
1234: if (p->cf->ttl_security)
1235: if (sk_set_min_ttl(sk, 256 - hops) < 0)
1236: goto err;
1237:
1238: if (p->cf->enable_extended_messages)
1239: {
1240: sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
1241: sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
1242: sk_reallocate(sk);
1243: }
1244:
1245: /* For dynamic BGP, spawn new instance and postpone the socket */
1246: if (bgp_is_dynamic(p))
1247: {
1248: p = bgp_spawn(p, sk->daddr);
1249: p->postponed_sk = sk;
1250: rmove(sk, p->p.pool);
1251: return 0;
1252: }
1253:
1254: rmove(sk, p->p.pool);
1255: bgp_setup_conn(p, &p->incoming_conn);
1256: bgp_setup_sk(&p->incoming_conn, sk);
1257: bgp_send_open(&p->incoming_conn);
1258: return 0;
1259:
1260: err:
1261: sk_log_error(sk, p->p.name);
1262: log(L_ERR "%s: Incoming connection aborted", p->p.name);
1263: rfree(sk);
1264: return 0;
1265: }
1266:
1267: static void
1268: bgp_listen_sock_err(sock *sk UNUSED, int err)
1269: {
1270: if (err == ECONNABORTED)
1271: log(L_WARN "BGP: Incoming connection aborted");
1272: else
1273: log(L_ERR "BGP: Error on listening socket: %M", err);
1274: }
1275:
1276: static void
1277: bgp_start_neighbor(struct bgp_proto *p)
1278: {
1279: /* Called only for single-hop BGP sessions */
1280:
1281: if (ipa_zero(p->local_ip))
1282: p->local_ip = p->neigh->ifa->ip;
1283:
1284: if (ipa_is_link_local(p->local_ip))
1285: p->link_addr = p->local_ip;
1286: else if (p->neigh->iface->llv6)
1287: p->link_addr = p->neigh->iface->llv6->ip;
1288:
1289: bgp_initiate(p);
1290: }
1291:
1292: static void
1293: bgp_neigh_notify(neighbor *n)
1294: {
1295: struct bgp_proto *p = (struct bgp_proto *) n->proto;
1296: int ps = p->p.proto_state;
1297:
1298: if (n != p->neigh)
1299: return;
1300:
1301: if ((ps == PS_DOWN) || (ps == PS_STOP))
1302: return;
1303:
1304: int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1305:
1306: if (n->scope <= 0)
1307: {
1308: if (!prepare)
1309: {
1310: BGP_TRACE(D_EVENTS, "Neighbor lost");
1311: bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1312: /* Perhaps also run bgp_update_startup_delay(p)? */
1313: bgp_stop(p, 0, NULL, 0);
1314: }
1315: }
1316: else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1317: {
1318: if (!prepare)
1319: {
1320: BGP_TRACE(D_EVENTS, "Link down");
1321: bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1322: if (ps == PS_UP)
1323: bgp_update_startup_delay(p);
1324: bgp_stop(p, 0, NULL, 0);
1325: }
1326: }
1327: else
1328: {
1329: if (prepare)
1330: {
1331: BGP_TRACE(D_EVENTS, "Neighbor ready");
1332: bgp_start_neighbor(p);
1333: }
1334: }
1335: }
1336:
1337: static void
1338: bgp_bfd_notify(struct bfd_request *req)
1339: {
1340: struct bgp_proto *p = req->data;
1341: int ps = p->p.proto_state;
1342:
1343: if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1344: {
1345: BGP_TRACE(D_EVENTS, "BFD session down");
1346: bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1347:
1348: if (p->cf->bfd == BGP_BFD_GRACEFUL)
1349: {
1350: /* Trigger graceful restart */
1351: if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1352: bgp_handle_graceful_restart(p);
1353:
1354: if (p->incoming_conn.state > BS_IDLE)
1355: bgp_conn_enter_idle_state(&p->incoming_conn);
1356:
1357: if (p->outgoing_conn.state > BS_IDLE)
1358: bgp_conn_enter_idle_state(&p->outgoing_conn);
1359: }
1360: else
1361: {
1362: /* Trigger session down */
1363: if (ps == PS_UP)
1364: bgp_update_startup_delay(p);
1365: bgp_stop(p, 0, NULL, 0);
1366: }
1367: }
1368: }
1369:
1370: static void
1371: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1372: {
1373: if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p))
1374: p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
1375: p->cf->multihop ? NULL : p->neigh->iface,
1376: p->p.vrf, bgp_bfd_notify, p);
1377:
1378: if (!use_bfd && p->bfd_req)
1379: {
1380: rfree(p->bfd_req);
1381: p->bfd_req = NULL;
1382: }
1383: }
1384:
1385: static void
1386: bgp_reload_routes(struct channel *C)
1387: {
1388: struct bgp_proto *p = (void *) C->proto;
1389: struct bgp_channel *c = (void *) C;
1390:
1391: ASSERT(p->conn && (p->route_refresh || c->c.in_table));
1392:
1393: if (c->c.in_table)
1394: channel_schedule_reload(C);
1395: else
1396: bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
1397: }
1398:
1399: static void
1400: bgp_feed_begin(struct channel *C, int initial)
1401: {
1402: struct bgp_proto *p = (void *) C->proto;
1403: struct bgp_channel *c = (void *) C;
1404:
1405: /* This should not happen */
1406: if (!p->conn)
1407: return;
1408:
1409: if (initial && p->cf->gr_mode)
1410: c->feed_state = BFS_LOADING;
1411:
1412: /* It is refeed and both sides support enhanced route refresh */
1413: if (!initial && p->enhanced_refresh)
1414: {
1415: /* BoRR must not be sent before End-of-RIB */
1416: if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
1417: return;
1418:
1419: c->feed_state = BFS_REFRESHING;
1420: bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
1421: }
1422: }
1423:
1424: static void
1425: bgp_feed_end(struct channel *C)
1426: {
1427: struct bgp_proto *p = (void *) C->proto;
1428: struct bgp_channel *c = (void *) C;
1429:
1430: /* This should not happen */
1431: if (!p->conn)
1432: return;
1433:
1434: /* Non-demarcated feed ended, nothing to do */
1435: if (c->feed_state == BFS_NONE)
1436: return;
1437:
1438: /* Schedule End-of-RIB packet */
1439: if (c->feed_state == BFS_LOADING)
1440: c->feed_state = BFS_LOADED;
1441:
1442: /* Schedule EoRR packet */
1443: if (c->feed_state == BFS_REFRESHING)
1444: c->feed_state = BFS_REFRESHED;
1445:
1446: /* Kick TX hook */
1447: bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1448: }
1449:
1450:
1451: static void
1452: bgp_start_locked(struct object_lock *lock)
1453: {
1454: struct bgp_proto *p = lock->data;
1455: const struct bgp_config *cf = p->cf;
1456:
1457: if (p->p.proto_state != PS_START)
1458: {
1459: DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1460: return;
1461: }
1462:
1463: DBG("BGP: Got lock\n");
1464:
1465: if (cf->multihop || bgp_is_dynamic(p))
1466: {
1467: /* Multi-hop sessions do not use neighbor entries */
1468: bgp_initiate(p);
1469: return;
1470: }
1471:
1472: neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
1473: if (!n)
1474: {
1475: log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
1476: /* As we do not start yet, we can just disable protocol */
1477: p->p.disabled = 1;
1478: bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1479: proto_notify_state(&p->p, PS_DOWN);
1480: return;
1481: }
1482:
1483: p->neigh = n;
1484:
1485: if (n->scope <= 0)
1486: BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
1487: else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1488: BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1489: else
1490: bgp_start_neighbor(p);
1491: }
1492:
1493: static int
1494: bgp_start(struct proto *P)
1495: {
1496: struct bgp_proto *p = (struct bgp_proto *) P;
1497: const struct bgp_config *cf = p->cf;
1498:
1499: p->local_ip = cf->local_ip;
1500: p->local_as = cf->local_as;
1501: p->remote_as = cf->remote_as;
1502: p->public_as = cf->local_as;
1503:
1504: /* For dynamic BGP childs, remote_ip is already set */
1505: if (ipa_nonzero(cf->remote_ip))
1506: p->remote_ip = cf->remote_ip;
1507:
1508: /* Confederation ID is used for truly external peers */
1509: if (p->cf->confederation && !p->is_interior)
1510: p->public_as = cf->confederation;
1511:
1512: p->passive = cf->passive || bgp_is_dynamic(p);
1513:
1514: p->start_state = BSS_PREPARE;
1515: p->outgoing_conn.state = BS_IDLE;
1516: p->incoming_conn.state = BS_IDLE;
1517: p->neigh = NULL;
1518: p->bfd_req = NULL;
1519: p->postponed_sk = NULL;
1520: p->gr_ready = 0;
1521: p->gr_active_num = 0;
1522:
1523: p->event = ev_new_init(p->p.pool, bgp_decision, p);
1524: p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
1525: p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
1526:
1527: p->local_id = proto_get_router_id(P->cf);
1528: if (p->rr_client)
1529: p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1530:
1531: p->remote_id = 0;
1532: p->link_addr = IPA_NONE;
1533:
1534: /* Lock all channels when in GR recovery mode */
1535: if (p->p.gr_recovery && p->cf->gr_mode)
1536: {
1537: struct bgp_channel *c;
1538: WALK_LIST(c, p->p.channels)
1539: channel_graceful_restart_lock(&c->c);
1540: }
1541:
1542: /*
1543: * Before attempting to create the connection, we need to lock the port,
1544: * so that we are the only instance attempting to talk with that neighbor.
1545: */
1546: struct object_lock *lock;
1547: lock = p->lock = olock_new(P->pool);
1548: lock->addr = p->remote_ip;
1549: lock->port = p->cf->remote_port;
1550: lock->iface = p->cf->iface;
1551: lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1552: lock->type = OBJLOCK_TCP;
1553: lock->hook = bgp_start_locked;
1554: lock->data = p;
1555:
1556: /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
1557: if (bgp_is_dynamic(p))
1558: {
1559: lock->addr = net_prefix(p->cf->remote_range);
1560: lock->inst = 1;
1561: }
1562:
1563: olock_acquire(lock);
1564:
1565: return PS_START;
1566: }
1567:
1568: extern int proto_restart;
1569:
1570: static int
1571: bgp_shutdown(struct proto *P)
1572: {
1573: struct bgp_proto *p = (struct bgp_proto *) P;
1574: int subcode = 0;
1575:
1576: char *message = NULL;
1577: byte *data = NULL;
1578: uint len = 0;
1579:
1580: BGP_TRACE(D_EVENTS, "Shutdown requested");
1581:
1582: switch (P->down_code)
1583: {
1584: case PDC_CF_REMOVE:
1585: case PDC_CF_DISABLE:
1586: subcode = 3; // Errcode 6, 3 - peer de-configured
1587: break;
1588:
1589: case PDC_CF_RESTART:
1590: subcode = 6; // Errcode 6, 6 - other configuration change
1591: break;
1592:
1593: case PDC_CMD_DISABLE:
1594: case PDC_CMD_SHUTDOWN:
1595: shutdown:
1596: subcode = 2; // Errcode 6, 2 - administrative shutdown
1597: message = P->message;
1598: break;
1599:
1600: case PDC_CMD_RESTART:
1601: subcode = 4; // Errcode 6, 4 - administrative reset
1602: message = P->message;
1603: break;
1604:
1605: case PDC_CMD_GR_DOWN:
1606: if ((p->cf->gr_mode != BGP_GR_ABLE) &&
1607: (p->cf->llgr_mode != BGP_LLGR_ABLE))
1608: goto shutdown;
1609:
1610: subcode = -1; // Do not send NOTIFICATION, just close the connection
1611: break;
1612:
1613: case PDC_RX_LIMIT_HIT:
1614: case PDC_IN_LIMIT_HIT:
1615: subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1616: /* log message for compatibility */
1617: log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1618: goto limit;
1619:
1620: case PDC_OUT_LIMIT_HIT:
1621: subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1622:
1623: limit:
1624: bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1625: if (proto_restart)
1626: bgp_update_startup_delay(p);
1627: else
1628: p->startup_delay = 0;
1629: goto done;
1630: }
1631:
1632: bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1633: p->startup_delay = 0;
1634:
1635: /* RFC 8203 - shutdown communication */
1636: if (message)
1637: {
1638: uint msg_len = strlen(message);
1639: msg_len = MIN(msg_len, 255);
1640:
1641: /* Buffer will be freed automatically by protocol shutdown */
1642: data = mb_alloc(p->p.pool, msg_len + 1);
1643: len = msg_len + 1;
1644:
1645: data[0] = msg_len;
1646: memcpy(data+1, message, msg_len);
1647: }
1648:
1649: done:
1650: bgp_stop(p, subcode, data, len);
1651: return p->p.proto_state;
1652: }
1653:
1654: static struct proto *
1655: bgp_init(struct proto_config *CF)
1656: {
1657: struct proto *P = proto_new(CF);
1658: struct bgp_proto *p = (struct bgp_proto *) P;
1659: struct bgp_config *cf = (struct bgp_config *) CF;
1660:
1661: P->rt_notify = bgp_rt_notify;
1662: P->preexport = bgp_preexport;
1663: P->neigh_notify = bgp_neigh_notify;
1664: P->reload_routes = bgp_reload_routes;
1665: P->feed_begin = bgp_feed_begin;
1666: P->feed_end = bgp_feed_end;
1667: P->rte_better = bgp_rte_better;
1668: P->rte_mergable = bgp_rte_mergable;
1669: P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
1670: P->rte_modify = bgp_rte_modify_stale;
1671:
1672: p->cf = cf;
1673: p->is_internal = (cf->local_as == cf->remote_as);
1674: p->is_interior = p->is_internal || cf->confederation_member;
1675: p->rs_client = cf->rs_client;
1676: p->rr_client = cf->rr_client;
1677:
1678: p->ipv4 = ipa_nonzero(cf->remote_ip) ?
1679: ipa_is_ip4(cf->remote_ip) :
1680: (cf->remote_range && (cf->remote_range->type == NET_IP4));
1681:
1682: p->remote_ip = cf->remote_ip;
1683: p->remote_as = cf->remote_as;
1684:
1685: /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
1686: if (cf->c.parent)
1687: cf->remote_ip = IPA_NONE;
1688:
1689: /* Add all channels */
1690: struct bgp_channel_config *cc;
1691: WALK_LIST(cc, CF->channels)
1692: proto_add_channel(P, &cc->c);
1693:
1694: return P;
1695: }
1696:
1697: static void
1698: bgp_channel_init(struct channel *C, struct channel_config *CF)
1699: {
1700: struct bgp_channel *c = (void *) C;
1701: struct bgp_channel_config *cf = (void *) CF;
1702:
1703: c->cf = cf;
1704: c->afi = cf->afi;
1705: c->desc = cf->desc;
1706:
1707: if (cf->igp_table_ip4)
1708: c->igp_table_ip4 = cf->igp_table_ip4->table;
1709:
1710: if (cf->igp_table_ip6)
1711: c->igp_table_ip6 = cf->igp_table_ip6->table;
1712: }
1713:
1714: static int
1715: bgp_channel_start(struct channel *C)
1716: {
1717: struct bgp_proto *p = (void *) C->proto;
1718: struct bgp_channel *c = (void *) C;
1719: ip_addr src = p->local_ip;
1720:
1721: if (c->igp_table_ip4)
1722: rt_lock_table(c->igp_table_ip4);
1723:
1724: if (c->igp_table_ip6)
1725: rt_lock_table(c->igp_table_ip6);
1726:
1727: c->pool = p->p.pool; // XXXX
1728: bgp_init_bucket_table(c);
1729: bgp_init_prefix_table(c);
1730:
1731: if (c->cf->import_table)
1732: channel_setup_in_table(C);
1733:
1734: if (c->cf->export_table)
1735: channel_setup_out_table(C);
1736:
1737: c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
1738:
1739: c->next_hop_addr = c->cf->next_hop_addr;
1740: c->link_addr = IPA_NONE;
1741: c->packets_to_send = 0;
1742:
1743: /* Try to use source address as next hop address */
1744: if (ipa_zero(c->next_hop_addr))
1745: {
1746: if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
1747: c->next_hop_addr = src;
1748:
1749: if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
1750: c->next_hop_addr = src;
1751: }
1752:
1753: /* Use preferred addresses associated with interface / source address */
1754: if (ipa_zero(c->next_hop_addr))
1755: {
1756: /* We know the iface for single-hop, we make lookup for multihop */
1757: struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
1758: struct iface *iface = nbr ? nbr->iface : NULL;
1759:
1760: if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
1761: c->next_hop_addr = iface->addr4->ip;
1762:
1763: if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
1764: c->next_hop_addr = iface->addr6->ip;
1765: }
1766:
1767: /* Exit if no feasible next hop address is found */
1768: if (ipa_zero(c->next_hop_addr))
1769: {
1770: log(L_WARN "%s: Missing next hop address", p->p.name);
1771: return 0;
1772: }
1773:
1774: /* Set link-local address for IPv6 single-hop BGP */
1775: if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
1776: {
1777: c->link_addr = p->link_addr;
1778:
1779: if (ipa_zero(c->link_addr))
1780: log(L_WARN "%s: Missing link-local address", p->p.name);
1781: }
1782:
1783: /* Link local address is already in c->link_addr */
1784: if (ipa_is_link_local(c->next_hop_addr))
1785: c->next_hop_addr = IPA_NONE;
1786:
1787: return 0; /* XXXX: Currently undefined */
1788: }
1789:
1790: static void
1791: bgp_channel_shutdown(struct channel *C)
1792: {
1793: struct bgp_channel *c = (void *) C;
1794:
1795: c->next_hop_addr = IPA_NONE;
1796: c->link_addr = IPA_NONE;
1797: c->packets_to_send = 0;
1798: }
1799:
1800: static void
1801: bgp_channel_cleanup(struct channel *C)
1802: {
1803: struct bgp_channel *c = (void *) C;
1804:
1805: if (c->igp_table_ip4)
1806: rt_unlock_table(c->igp_table_ip4);
1807:
1808: if (c->igp_table_ip6)
1809: rt_unlock_table(c->igp_table_ip6);
1810:
1811: c->index = 0;
1812:
1813: /* Cleanup rest of bgp_channel starting at pool field */
1814: memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool));
1815: }
1816:
1817: static inline struct bgp_channel_config *
1818: bgp_find_channel_config(struct bgp_config *cf, u32 afi)
1819: {
1820: struct bgp_channel_config *cc;
1821:
1822: WALK_LIST(cc, cf->c.channels)
1823: if (cc->afi == afi)
1824: return cc;
1825:
1826: return NULL;
1827: }
1828:
1829: struct rtable_config *
1830: bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
1831: {
1832: struct bgp_channel_config *cc2;
1833: struct rtable_config *tab;
1834:
1835: /* First, try table connected by the channel */
1836: if (cc->c.table->addr_type == type)
1837: return cc->c.table;
1838:
1839: /* Find paired channel with the same SAFI but the other AFI */
1840: u32 afi2 = cc->afi ^ 0x30000;
1841: cc2 = bgp_find_channel_config(cf, afi2);
1842:
1843: /* Second, try IGP table configured in the paired channel */
1844: if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
1845: return tab;
1846:
1847: /* Third, try table connected by the paired channel */
1848: if (cc2 && (cc2->c.table->addr_type == type))
1849: return cc2->c.table;
1850:
1851: /* Last, try default table of given type */
1852: if (tab = cf->c.global->def_tables[type])
1853: return tab;
1854:
1855: cf_error("Undefined IGP table");
1856: }
1857:
1858:
1859: void
1860: bgp_postconfig(struct proto_config *CF)
1861: {
1862: struct bgp_config *cf = (void *) CF;
1863:
1864: /* Do not check templates at all */
1865: if (cf->c.class == SYM_TEMPLATE)
1866: return;
1867:
1868:
1869: /* Handle undefined remote_as, zero should mean unspecified external */
1870: if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
1871: cf->remote_as = cf->local_as;
1872:
1873: int internal = (cf->local_as == cf->remote_as);
1874: int interior = internal || cf->confederation_member;
1875:
1876: /* EBGP direct by default, IBGP multihop by default */
1877: if (cf->multihop < 0)
1878: cf->multihop = internal ? 64 : 0;
1879:
1880: /* LLGR mode default based on GR mode */
1881: if (cf->llgr_mode < 0)
1882: cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
1883:
1884: /* Link check for single-hop BGP by default */
1885: if (cf->check_link < 0)
1886: cf->check_link = !cf->multihop;
1887:
1888:
1889: if (!cf->local_as)
1890: cf_error("Local AS number must be set");
1891:
1892: if (ipa_zero(cf->remote_ip) && !cf->remote_range)
1893: cf_error("Neighbor must be configured");
1894:
1895: if (ipa_zero(cf->local_ip) && cf->strict_bind)
1896: cf_error("Local address must be configured for strict bind");
1897:
1898: if (!cf->remote_as && !cf->peer_type)
1899: cf_error("Remote AS number (or peer type) must be set");
1900:
1901: if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
1902: cf_error("IBGP cannot have different ASNs");
1903:
1904: if ((cf->peer_type == BGP_PT_EXTERNAL) && internal)
1905: cf_error("EBGP cannot have the same ASNs");
1906:
1907: if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
1908: ipa_is_link_local(cf->remote_ip)))
1909: cf_error("Link-local addresses require defined interface");
1910:
1911: if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
1912: cf_error("Neighbor AS number out of range (AS4 not available)");
1913:
1914: if (!internal && cf->rr_client)
1915: cf_error("Only internal neighbor can be RR client");
1916:
1917: if (internal && cf->rs_client)
1918: cf_error("Only external neighbor can be RS client");
1919:
1920: if (!cf->confederation && cf->confederation_member)
1921: cf_error("Confederation ID must be set for member sessions");
1922:
1923: if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
1924: ipa_is_link_local(cf->remote_ip)))
1925: cf_error("Multihop BGP cannot be used with link-local addresses");
1926:
1927: if (cf->multihop && cf->iface)
1928: cf_error("Multihop BGP cannot be bound to interface");
1929:
1930: if (cf->multihop && cf->check_link)
1931: cf_error("Multihop BGP cannot depend on link state");
1932:
1933: if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
1934: cf_error("Multihop BGP with BFD requires specified local address");
1935:
1936: if (!cf->gr_mode && cf->llgr_mode)
1937: cf_error("Long-lived graceful restart requires basic graceful restart");
1938:
1939:
1940: struct bgp_channel_config *cc;
1941: WALK_LIST(cc, CF->channels)
1942: {
1943: /* Handle undefined import filter */
1944: if (cc->c.in_filter == FILTER_UNDEF)
1945: if (interior)
1946: cc->c.in_filter = FILTER_ACCEPT;
1947: else
1948: cf_error("EBGP requires explicit import policy");
1949:
1950: /* Handle undefined export filter */
1951: if (cc->c.out_filter == FILTER_UNDEF)
1952: if (interior)
1953: cc->c.out_filter = FILTER_REJECT;
1954: else
1955: cf_error("EBGP requires explicit export policy");
1956:
1957: /* Disable after error incompatible with restart limit action */
1958: if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
1959: cc->c.in_limit.action = PLA_DISABLE;
1960:
1961: /* Different default based on rr_client, rs_client */
1962: if (cc->next_hop_keep == 0xff)
1963: cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
1964:
1965: /* Different default based on rs_client */
1966: if (!cc->missing_lladdr)
1967: cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
1968:
1969: /* Different default for gw_mode */
1970: if (!cc->gw_mode)
1971: cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
1972:
1973: /* Defaults based on proto config */
1974: if (cc->gr_able == 0xff)
1975: cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
1976:
1977: if (cc->llgr_able == 0xff)
1978: cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
1979:
1980: if (cc->llgr_time == ~0U)
1981: cc->llgr_time = cf->llgr_time;
1982:
1983: /* AIGP enabled by default on interior sessions */
1984: if (cc->aigp == 0xff)
1985: cc->aigp = interior;
1986:
1987: /* Default values of IGP tables */
1988: if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
1989: {
1990: if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
1991: cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
1992:
1993: if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
1994: cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
1995:
1996: if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
1997: cf_error("Mismatched IGP table type");
1998:
1999: if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
2000: cf_error("Mismatched IGP table type");
2001: }
2002:
2003: if (cf->multihop && (cc->gw_mode == GW_DIRECT))
2004: cf_error("Multihop BGP cannot use direct gateway mode");
2005:
2006: if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
2007: cf_error("BGP in recursive mode prohibits sorted table");
2008:
2009: if (cf->deterministic_med && cc->c.table->sorted)
2010: cf_error("BGP with deterministic MED prohibits sorted table");
2011:
2012: if (cc->secondary && !cc->c.table->sorted)
2013: cf_error("BGP with secondary option requires sorted table");
2014: }
2015: }
2016:
2017: static int
2018: bgp_reconfigure(struct proto *P, struct proto_config *CF)
2019: {
2020: struct bgp_proto *p = (void *) P;
2021: const struct bgp_config *new = (void *) CF;
2022: const struct bgp_config *old = p->cf;
2023:
2024: if (proto_get_router_id(CF) != p->local_id)
2025: return 0;
2026:
2027: int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
2028: ((byte *) new) + sizeof(struct proto_config),
2029: // password item is last and must be checked separately
2030: OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
2031: && !bstrcmp(old->password, new->password)
2032: && ((!old->remote_range && !new->remote_range)
2033: || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
2034: && !bstrcmp(old->dynamic_name, new->dynamic_name)
2035: && (old->dynamic_name_digits == new->dynamic_name_digits);
2036:
2037: /* FIXME: Move channel reconfiguration to generic protocol code ? */
2038: struct channel *C, *C2;
2039: struct bgp_channel_config *cc;
2040:
2041: WALK_LIST(C, p->p.channels)
2042: C->stale = 1;
2043:
2044: WALK_LIST(cc, new->c.channels)
2045: {
2046: C = (struct channel *) bgp_find_channel(p, cc->afi);
2047: same = proto_configure_channel(P, &C, &cc->c) && same;
2048:
2049: if (C)
2050: C->stale = 0;
2051: }
2052:
2053: WALK_LIST_DELSAFE(C, C2, p->p.channels)
2054: if (C->stale)
2055: same = proto_configure_channel(P, &C, NULL) && same;
2056:
2057:
2058: if (same && (p->start_state > BSS_PREPARE))
2059: bgp_update_bfd(p, new->bfd);
2060:
2061: /* We should update our copy of configuration ptr as old configuration will be freed */
2062: if (same)
2063: p->cf = new;
2064:
2065: /* Reset name counter */
2066: p->dynamic_name_counter = 0;
2067:
2068: return same;
2069: }
2070:
2071: #define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
2072:
2073: static int
2074: bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
2075: {
2076: struct bgp_proto *p = (void *) C->proto;
2077: struct bgp_channel *c = (void *) C;
2078: struct bgp_channel_config *new = (void *) CC;
2079: struct bgp_channel_config *old = c->cf;
2080:
2081: if ((new->secondary != old->secondary) ||
2082: (new->gr_able != old->gr_able) ||
2083: (new->llgr_able != old->llgr_able) ||
2084: (new->llgr_time != old->llgr_time) ||
2085: (new->ext_next_hop != old->ext_next_hop) ||
2086: (new->add_path != old->add_path) ||
2087: (new->import_table != old->import_table) ||
2088: (new->export_table != old->export_table) ||
2089: (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) ||
2090: (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6)))
2091: return 0;
2092:
2093: if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
2094: return 0;
2095:
2096: if ((new->gw_mode != old->gw_mode) ||
2097: (new->aigp != old->aigp) ||
2098: (new->cost != old->cost))
2099: {
2100: /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
2101: if (c->c.in_table && (c->c.channel_state == CS_UP))
2102: bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
2103:
2104: *import_changed = 1;
2105: }
2106:
2107: if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
2108: (new->next_hop_self != old->next_hop_self) ||
2109: (new->next_hop_keep != old->next_hop_keep) ||
2110: (new->missing_lladdr != old->missing_lladdr) ||
2111: (new->aigp != old->aigp) ||
2112: (new->aigp_originate != old->aigp_originate))
2113: *export_changed = 1;
2114:
2115: c->cf = new;
2116: return 1;
2117: }
2118:
2119: static void
2120: bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
2121: {
2122: /* Just a shallow copy */
2123: }
2124:
2125:
2126: /**
2127: * bgp_error - report a protocol error
2128: * @c: connection
2129: * @code: error code (according to the RFC)
2130: * @subcode: error sub-code
2131: * @data: data to be passed in the Notification message
2132: * @len: length of the data
2133: *
2134: * bgp_error() sends a notification packet to tell the other side that a protocol
2135: * error has occurred (including the data considered erroneous if possible) and
2136: * closes the connection.
2137: */
2138: void
2139: bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
2140: {
2141: struct bgp_proto *p = c->bgp;
2142:
2143: if (c->state == BS_CLOSE)
2144: return;
2145:
2146: bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
2147: bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
2148: bgp_conn_enter_close_state(c);
2149:
2150: c->notify_code = code;
2151: c->notify_subcode = subcode;
2152: c->notify_data = data;
2153: c->notify_size = (len > 0) ? len : 0;
2154: bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
2155:
2156: if (code != 6)
2157: {
2158: bgp_update_startup_delay(p);
2159: bgp_stop(p, 0, NULL, 0);
2160: }
2161: }
2162:
2163: /**
2164: * bgp_store_error - store last error for status report
2165: * @p: BGP instance
2166: * @c: connection
2167: * @class: error class (BE_xxx constants)
2168: * @code: error code (class specific)
2169: *
2170: * bgp_store_error() decides whether given error is interesting enough
2171: * and store that error to last_error variables of @p
2172: */
2173: void
2174: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
2175: {
2176: /* During PS_UP, we ignore errors on secondary connection */
2177: if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
2178: return;
2179:
2180: /* During PS_STOP, we ignore any errors, as we want to report
2181: * the error that caused transition to PS_STOP
2182: */
2183: if (p->p.proto_state == PS_STOP)
2184: return;
2185:
2186: p->last_error_class = class;
2187: p->last_error_code = code;
2188: }
2189:
2190: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
2191: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
2192: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
2193: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
2194: static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
2195:
2196: static const char *
2197: bgp_last_errmsg(struct bgp_proto *p)
2198: {
2199: switch (p->last_error_class)
2200: {
2201: case BE_MISC:
2202: return bgp_misc_errors[p->last_error_code];
2203: case BE_SOCKET:
2204: return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
2205: case BE_BGP_RX:
2206: case BE_BGP_TX:
2207: return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
2208: case BE_AUTO_DOWN:
2209: return bgp_auto_errors[p->last_error_code];
2210: default:
2211: return "";
2212: }
2213: }
2214:
2215: static const char *
2216: bgp_state_dsc(struct bgp_proto *p)
2217: {
2218: if (p->p.proto_state == PS_DOWN)
2219: return "Down";
2220:
2221: int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
2222: if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
2223: return "Passive";
2224:
2225: return bgp_state_names[state];
2226: }
2227:
2228: static void
2229: bgp_get_status(struct proto *P, byte *buf)
2230: {
2231: struct bgp_proto *p = (struct bgp_proto *) P;
2232:
2233: const char *err1 = bgp_err_classes[p->last_error_class];
2234: const char *err2 = bgp_last_errmsg(p);
2235:
2236: if (P->proto_state == PS_DOWN)
2237: bsprintf(buf, "%s%s", err1, err2);
2238: else
2239: bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
2240: }
2241:
2242: static void
2243: bgp_show_afis(int code, char *s, u32 *afis, uint count)
2244: {
2245: buffer b;
2246: LOG_BUFFER_INIT(b);
2247:
2248: buffer_puts(&b, s);
2249:
2250: for (u32 *af = afis; af < (afis + count); af++)
2251: {
2252: const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
2253: if (desc)
2254: buffer_print(&b, " %s", desc->name);
2255: else
2256: buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
2257: }
2258:
2259: if (b.pos == b.end)
2260: strcpy(b.end - 32, " ... <too long>");
2261:
2262: cli_msg(code, b.start);
2263: }
2264:
2265: static void
2266: bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
2267: {
2268: struct bgp_af_caps *ac;
2269: uint any_mp_bgp = 0;
2270: uint any_gr_able = 0;
2271: uint any_add_path = 0;
2272: uint any_ext_next_hop = 0;
2273: uint any_llgr_able = 0;
2274: u32 *afl1 = alloca(caps->af_count * sizeof(u32));
2275: u32 *afl2 = alloca(caps->af_count * sizeof(u32));
2276: uint afn1, afn2;
2277:
2278: WALK_AF_CAPS(caps, ac)
2279: {
2280: any_mp_bgp |= ac->ready;
2281: any_gr_able |= ac->gr_able;
2282: any_add_path |= ac->add_path;
2283: any_ext_next_hop |= ac->ext_next_hop;
2284: any_llgr_able |= ac->llgr_able;
2285: }
2286:
2287: if (any_mp_bgp)
2288: {
2289: cli_msg(-1006, " Multiprotocol");
2290:
2291: afn1 = 0;
2292: WALK_AF_CAPS(caps, ac)
2293: if (ac->ready)
2294: afl1[afn1++] = ac->afi;
2295:
2296: bgp_show_afis(-1006, " AF announced:", afl1, afn1);
2297: }
2298:
2299: if (caps->route_refresh)
2300: cli_msg(-1006, " Route refresh");
2301:
2302: if (any_ext_next_hop)
2303: {
2304: cli_msg(-1006, " Extended next hop");
2305:
2306: afn1 = 0;
2307: WALK_AF_CAPS(caps, ac)
2308: if (ac->ext_next_hop)
2309: afl1[afn1++] = ac->afi;
2310:
2311: bgp_show_afis(-1006, " IPv6 nexthop:", afl1, afn1);
2312: }
2313:
2314: if (caps->ext_messages)
2315: cli_msg(-1006, " Extended message");
2316:
2317: if (caps->gr_aware)
2318: cli_msg(-1006, " Graceful restart");
2319:
2320: if (any_gr_able)
2321: {
2322: /* Continues from gr_aware */
2323: cli_msg(-1006, " Restart time: %u", caps->gr_time);
2324: if (caps->gr_flags & BGP_GRF_RESTART)
2325: cli_msg(-1006, " Restart recovery");
2326:
2327: afn1 = afn2 = 0;
2328: WALK_AF_CAPS(caps, ac)
2329: {
2330: if (ac->gr_able)
2331: afl1[afn1++] = ac->afi;
2332:
2333: if (ac->gr_af_flags & BGP_GRF_FORWARDING)
2334: afl2[afn2++] = ac->afi;
2335: }
2336:
2337: bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2338: bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2339: }
2340:
2341: if (caps->as4_support)
2342: cli_msg(-1006, " 4-octet AS numbers");
2343:
2344: if (any_add_path)
2345: {
2346: cli_msg(-1006, " ADD-PATH");
2347:
2348: afn1 = afn2 = 0;
2349: WALK_AF_CAPS(caps, ac)
2350: {
2351: if (ac->add_path & BGP_ADD_PATH_RX)
2352: afl1[afn1++] = ac->afi;
2353:
2354: if (ac->add_path & BGP_ADD_PATH_TX)
2355: afl2[afn2++] = ac->afi;
2356: }
2357:
2358: bgp_show_afis(-1006, " RX:", afl1, afn1);
2359: bgp_show_afis(-1006, " TX:", afl2, afn2);
2360: }
2361:
2362: if (caps->enhanced_refresh)
2363: cli_msg(-1006, " Enhanced refresh");
2364:
2365: if (caps->llgr_aware)
2366: cli_msg(-1006, " Long-lived graceful restart");
2367:
2368: if (any_llgr_able)
2369: {
2370: u32 stale_time = 0;
2371:
2372: afn1 = afn2 = 0;
2373: WALK_AF_CAPS(caps, ac)
2374: {
2375: stale_time = MAX(stale_time, ac->llgr_time);
2376:
2377: if (ac->llgr_able && ac->llgr_time)
2378: afl1[afn1++] = ac->afi;
2379:
2380: if (ac->llgr_flags & BGP_GRF_FORWARDING)
2381: afl2[afn2++] = ac->afi;
2382: }
2383:
2384: /* Continues from llgr_aware */
2385: cli_msg(-1006, " LL stale time: %u", stale_time);
2386:
2387: bgp_show_afis(-1006, " AF supported:", afl1, afn1);
2388: bgp_show_afis(-1006, " AF preserved:", afl2, afn2);
2389: }
2390: }
2391:
2392: static void
2393: bgp_show_proto_info(struct proto *P)
2394: {
2395: struct bgp_proto *p = (struct bgp_proto *) P;
2396:
2397: cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
2398:
2399: if (bgp_is_dynamic(p) && p->cf->remote_range)
2400: cli_msg(-1006, " Neighbor range: %N", p->cf->remote_range);
2401: else
2402: cli_msg(-1006, " Neighbor address: %I%J", p->remote_ip, p->cf->iface);
2403:
2404: cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
2405: cli_msg(-1006, " Local AS: %u", p->cf->local_as);
2406:
2407: if (p->gr_active_num)
2408: cli_msg(-1006, " Neighbor graceful restart active");
2409:
2410: if (P->proto_state == PS_START)
2411: {
2412: struct bgp_conn *oc = &p->outgoing_conn;
2413:
2414: if ((p->start_state < BSS_CONNECT) &&
2415: (tm_active(p->startup_timer)))
2416: cli_msg(-1006, " Error wait: %t/%u",
2417: tm_remains(p->startup_timer), p->startup_delay);
2418:
2419: if ((oc->state == BS_ACTIVE) &&
2420: (tm_active(oc->connect_timer)))
2421: cli_msg(-1006, " Connect delay: %t/%u",
2422: tm_remains(oc->connect_timer), p->cf->connect_delay_time);
2423:
2424: if (p->gr_active_num && tm_active(p->gr_timer))
2425: cli_msg(-1006, " Restart timer: %t/-",
2426: tm_remains(p->gr_timer));
2427: }
2428: else if (P->proto_state == PS_UP)
2429: {
2430: cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
2431: cli_msg(-1006, " Local capabilities");
2432: bgp_show_capabilities(p, p->conn->local_caps);
2433: cli_msg(-1006, " Neighbor capabilities");
2434: bgp_show_capabilities(p, p->conn->remote_caps);
2435: cli_msg(-1006, " Session: %s%s%s%s%s",
2436: p->is_internal ? "internal" : "external",
2437: p->cf->multihop ? " multihop" : "",
2438: p->rr_client ? " route-reflector" : "",
2439: p->rs_client ? " route-server" : "",
2440: p->as4_session ? " AS4" : "");
2441: cli_msg(-1006, " Source address: %I", p->local_ip);
2442: cli_msg(-1006, " Hold timer: %t/%u",
2443: tm_remains(p->conn->hold_timer), p->conn->hold_time);
2444: cli_msg(-1006, " Keepalive timer: %t/%u",
2445: tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
2446: }
2447:
2448: if ((p->last_error_class != BE_NONE) &&
2449: (p->last_error_class != BE_MAN_DOWN))
2450: {
2451: const char *err1 = bgp_err_classes[p->last_error_class];
2452: const char *err2 = bgp_last_errmsg(p);
2453: cli_msg(-1006, " Last error: %s%s", err1, err2);
2454: }
2455:
2456: {
2457: struct bgp_channel *c;
2458: WALK_LIST(c, p->p.channels)
2459: {
2460: channel_show_info(&c->c);
2461:
2462: if (p->gr_active_num)
2463: cli_msg(-1006, " Neighbor GR: %s", bgp_gr_states[c->gr_active]);
2464:
2465: if (c->stale_timer && tm_active(c->stale_timer))
2466: cli_msg(-1006, " LL stale timer: %t/-", tm_remains(c->stale_timer));
2467:
2468: if (c->c.channel_state == CS_UP)
2469: {
2470: if (ipa_zero(c->link_addr))
2471: cli_msg(-1006, " BGP Next hop: %I", c->next_hop_addr);
2472: else
2473: cli_msg(-1006, " BGP Next hop: %I %I", c->next_hop_addr, c->link_addr);
2474: }
2475:
2476: if (c->igp_table_ip4)
2477: cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
2478:
2479: if (c->igp_table_ip6)
2480: cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
2481: }
2482: }
2483: }
2484:
2485: struct channel_class channel_bgp = {
2486: .channel_size = sizeof(struct bgp_channel),
2487: .config_size = sizeof(struct bgp_channel_config),
2488: .init = bgp_channel_init,
2489: .start = bgp_channel_start,
2490: .shutdown = bgp_channel_shutdown,
2491: .cleanup = bgp_channel_cleanup,
2492: .reconfigure = bgp_channel_reconfigure,
2493: };
2494:
2495: struct protocol proto_bgp = {
2496: .name = "BGP",
2497: .template = "bgp%d",
2498: .class = PROTOCOL_BGP,
2499: .preference = DEF_PREF_BGP,
2500: .channel_mask = NB_IP | NB_VPN | NB_FLOW,
2501: .proto_size = sizeof(struct bgp_proto),
2502: .config_size = sizeof(struct bgp_config),
2503: .postconfig = bgp_postconfig,
2504: .init = bgp_init,
2505: .start = bgp_start,
2506: .shutdown = bgp_shutdown,
2507: .reconfigure = bgp_reconfigure,
2508: .copy_config = bgp_copy_config,
2509: .get_status = bgp_get_status,
2510: .get_attr = bgp_get_attr,
2511: .get_route_info = bgp_get_route_info,
2512: .show_proto_info = bgp_show_proto_info
2513: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>