1: /*
2: * BIRD -- The Border Gateway Protocol
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: /**
10: * DOC: Border Gateway Protocol
11: *
12: * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13: * connection and most of the interface with BIRD core, |packets.c| handling
14: * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15: * manipulation with BGP attribute lists.
16: *
17: * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18: * architecture which is able to keep all the information needed by BGP in the
19: * primary routing table, therefore no complex data structures like a central
20: * BGP table are needed. This increases memory footprint of a BGP router with
21: * many connections, but not too much and, which is more important, it makes
22: * BGP much easier to implement.
23: *
24: * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25: * structure to which are attached individual connections represented by &bgp_connection
26: * (usually, there exists only one connection, but during BGP session setup, there
27: * can be more of them). The connections are handled according to the BGP state machine
28: * defined in the RFC with all the timers and all the parameters configurable.
29: *
30: * In incoming direction, we listen on the connection's socket and each time we receive
31: * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32: * passes complete packets to bgp_rx_packet() which distributes the packet according
33: * to its type.
34: *
35: * In outgoing direction, we gather all the routing updates and sort them to buckets
36: * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37: * of &rta's and a &fib which helps us to find if we already have another route for
38: * the same destination queued for sending, so that we can replace it with the new one
39: * immediately instead of sending both updates). There also exists a special bucket holding
40: * all the route withdrawals which cannot be queued anywhere else as they don't have any
41: * attributes. If we have any packet to send (due to either new routes or the connection
42: * tracking code wanting to send a Open, Keepalive or Notification message), we call
43: * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44: * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45: * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46: * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47: * type if we have more data of the same type to send.
48: *
49: * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50: * of the attribute blocks and translating them to the language of BIRD's extended attributes
51: * and bgp_encode_attrs() which does the converse. Both functions are built around a
52: * @bgp_attr_table array describing all important characteristics of all known attributes.
53: * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54: *
55: * BGP protocol implements graceful restart in both restarting (local restart)
56: * and receiving (neighbor restart) roles. The first is handled mostly by the
57: * graceful restart code in the nest, BGP protocol just handles capabilities,
58: * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59: * The second is implemented by internal restart of the BGP state to %BS_IDLE
60: * and protocol state to %PS_START, but keeping the protocol up from the core
61: * point of view and therefore maintaining received routes. Routing table
62: * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63: * stale routes after reestablishment of BGP session during graceful restart.
64: */
65:
66: #undef LOCAL_DEBUG
67:
68: #include "nest/bird.h"
69: #include "nest/iface.h"
70: #include "nest/protocol.h"
71: #include "nest/route.h"
72: #include "nest/cli.h"
73: #include "nest/locks.h"
74: #include "conf/conf.h"
75: #include "lib/socket.h"
76: #include "lib/resource.h"
77: #include "lib/string.h"
78:
79: #include "bgp.h"
80:
81:
82: struct linpool *bgp_linpool; /* Global temporary pool */
83: static sock *bgp_listen_sk; /* Global listening socket */
84: static int bgp_counter; /* Number of protocol instances using the listening socket */
85:
86: static void bgp_close(struct bgp_proto *p, int apply_md5);
87: static void bgp_connect(struct bgp_proto *p);
88: static void bgp_active(struct bgp_proto *p);
89: static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91:
92:
93: /**
94: * bgp_open - open a BGP instance
95: * @p: BGP instance
96: *
97: * This function allocates and configures shared BGP resources.
98: * Should be called as the last step during initialization
99: * (when lock is acquired and neighbor is ready).
100: * When error, state changed to PS_DOWN, -1 is returned and caller
101: * should return immediately.
102: */
103: static int
104: bgp_open(struct bgp_proto *p)
105: {
106: struct config *cfg = p->cf->c.global;
107: int errcode;
108:
109: if (!bgp_listen_sk)
110: bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111:
112: if (!bgp_listen_sk)
113: {
114: errcode = BEM_NO_SOCKET;
115: goto err;
116: }
117:
118: if (!bgp_linpool)
119: bgp_linpool = lp_new(&root_pool, 4080);
120:
121: bgp_counter++;
122:
123: if (p->cf->password)
124: if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
125: p->cf->iface, p->cf->password, p->cf->setkey) < 0)
126: {
127: sk_log_error(bgp_listen_sk, p->p.name);
128: bgp_close(p, 0);
129: errcode = BEM_INVALID_MD5;
130: goto err;
131: }
132:
133: return 0;
134:
135: err:
136: p->p.disabled = 1;
137: bgp_store_error(p, NULL, BE_MISC, errcode);
138: proto_notify_state(&p->p, PS_DOWN);
139: return -1;
140: }
141:
142: static void
143: bgp_startup(struct bgp_proto *p)
144: {
145: BGP_TRACE(D_EVENTS, "Started");
146: p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
147:
148: if (!p->cf->passive)
149: bgp_active(p);
150: }
151:
152: static void
153: bgp_startup_timeout(timer *t)
154: {
155: bgp_startup(t->data);
156: }
157:
158:
159: static void
160: bgp_initiate(struct bgp_proto *p)
161: {
162: int rv = bgp_open(p);
163: if (rv < 0)
164: return;
165:
166: if (p->cf->bfd)
167: bgp_update_bfd(p, p->cf->bfd);
168:
169: if (p->startup_delay)
170: {
171: p->start_state = BSS_DELAY;
172: BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
173: bgp_start_timer(p->startup_timer, p->startup_delay);
174: }
175: else
176: bgp_startup(p);
177: }
178:
179: /**
180: * bgp_close - close a BGP instance
181: * @p: BGP instance
182: * @apply_md5: 0 to disable unsetting MD5 auth
183: *
184: * This function frees and deconfigures shared BGP resources.
185: * @apply_md5 is set to 0 when bgp_close is called as a cleanup
186: * from failed bgp_open().
187: */
188: static void
189: bgp_close(struct bgp_proto *p, int apply_md5)
190: {
191: ASSERT(bgp_counter);
192: bgp_counter--;
193:
194: if (p->cf->password && apply_md5)
195: if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
196: p->cf->iface, NULL, p->cf->setkey) < 0)
197: sk_log_error(bgp_listen_sk, p->p.name);
198:
199: if (!bgp_counter)
200: {
201: rfree(bgp_listen_sk);
202: bgp_listen_sk = NULL;
203: rfree(bgp_linpool);
204: bgp_linpool = NULL;
205: }
206: }
207:
208: /**
209: * bgp_start_timer - start a BGP timer
210: * @t: timer
211: * @value: time to fire (0 to disable the timer)
212: *
213: * This functions calls tm_start() on @t with time @value and the
214: * amount of randomization suggested by the BGP standard. Please use
215: * it for all BGP timers.
216: */
217: void
218: bgp_start_timer(timer *t, int value)
219: {
220: if (value)
221: {
222: /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
223: t->randomize = value / 4;
224: tm_start(t, value - t->randomize);
225: }
226: else
227: tm_stop(t);
228: }
229:
230: /**
231: * bgp_close_conn - close a BGP connection
232: * @conn: connection to close
233: *
234: * This function takes a connection described by the &bgp_conn structure,
235: * closes its socket and frees all resources associated with it.
236: */
237: void
238: bgp_close_conn(struct bgp_conn *conn)
239: {
240: // struct bgp_proto *p = conn->bgp;
241:
242: DBG("BGP: Closing connection\n");
243: conn->packets_to_send = 0;
244: rfree(conn->connect_retry_timer);
245: conn->connect_retry_timer = NULL;
246: rfree(conn->keepalive_timer);
247: conn->keepalive_timer = NULL;
248: rfree(conn->hold_timer);
249: conn->hold_timer = NULL;
250: rfree(conn->sk);
251: conn->sk = NULL;
252: rfree(conn->tx_ev);
253: conn->tx_ev = NULL;
254: }
255:
256:
257: /**
258: * bgp_update_startup_delay - update a startup delay
259: * @p: BGP instance
260: *
261: * This function updates a startup delay that is used to postpone next BGP connect.
262: * It also handles disable_after_error and might stop BGP instance when error
263: * happened and disable_after_error is on.
264: *
265: * It should be called when BGP protocol error happened.
266: */
267: void
268: bgp_update_startup_delay(struct bgp_proto *p)
269: {
270: struct bgp_config *cf = p->cf;
271:
272: DBG("BGP: Updating startup delay\n");
273:
274: if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
275: p->startup_delay = 0;
276:
277: p->last_proto_error = now;
278:
279: if (cf->disable_after_error)
280: {
281: p->startup_delay = 0;
282: p->p.disabled = 1;
283: return;
284: }
285:
286: if (!p->startup_delay)
287: p->startup_delay = cf->error_delay_time_min;
288: else
289: p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
290: }
291:
292: static void
293: bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
294: {
295: switch (conn->state)
296: {
297: case BS_IDLE:
298: case BS_CLOSE:
299: return;
300: case BS_CONNECT:
301: case BS_ACTIVE:
302: bgp_conn_enter_idle_state(conn);
303: return;
304: case BS_OPENSENT:
305: case BS_OPENCONFIRM:
306: case BS_ESTABLISHED:
307: bgp_error(conn, 6, subcode, data, len);
308: return;
309: default:
310: bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
311: }
312: }
313:
314: static void
315: bgp_down(struct bgp_proto *p)
316: {
317: if (p->start_state > BSS_PREPARE)
318: bgp_close(p, 1);
319:
320: BGP_TRACE(D_EVENTS, "Down");
321: proto_notify_state(&p->p, PS_DOWN);
322: }
323:
324: static void
325: bgp_decision(void *vp)
326: {
327: struct bgp_proto *p = vp;
328:
329: DBG("BGP: Decision start\n");
330: if ((p->p.proto_state == PS_START)
331: && (p->outgoing_conn.state == BS_IDLE)
332: && (p->incoming_conn.state != BS_OPENCONFIRM)
333: && (!p->cf->passive))
334: bgp_active(p);
335:
336: if ((p->p.proto_state == PS_STOP)
337: && (p->outgoing_conn.state == BS_IDLE)
338: && (p->incoming_conn.state == BS_IDLE))
339: bgp_down(p);
340: }
341:
342: void
343: bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
344: {
345: proto_notify_state(&p->p, PS_STOP);
346: bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
347: bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
348: ev_schedule(p->event);
349: }
350:
351: static inline void
352: bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
353: {
354: if (conn->bgp->p.mrtdump & MD_STATES)
355: bgp_dump_state_change(conn, conn->state, new_state);
356:
357: conn->state = new_state;
358: }
359:
360: void
361: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
362: {
363: /* Really, most of the work is done in bgp_rx_open(). */
364: bgp_conn_set_state(conn, BS_OPENCONFIRM);
365: }
366:
367: void
368: bgp_conn_enter_established_state(struct bgp_conn *conn)
369: {
370: struct bgp_proto *p = conn->bgp;
371:
372: BGP_TRACE(D_EVENTS, "BGP session established");
373: DBG("BGP: UP!!!\n");
374:
375: /* For multi-hop BGP sessions */
376: if (ipa_zero(p->source_addr))
377: p->source_addr = conn->sk->saddr;
378:
379: conn->sk->fast_rx = 0;
380:
381: p->conn = conn;
382: p->last_error_class = 0;
383: p->last_error_code = 0;
384: p->feed_state = BFS_NONE;
385: p->load_state = BFS_NONE;
386: bgp_init_bucket_table(p);
387: bgp_init_prefix_table(p, 8);
388:
389: int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
390:
391: if (p->p.gr_recovery && !peer_gr_ready)
392: proto_graceful_restart_unlock(&p->p);
393:
394: if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
395: p->p.gr_wait = 1;
396:
397: if (p->gr_active == BGP_GRS_ACTIVE)
398: tm_stop(p->gr_timer);
399:
400: /* Check F-bit for regular graceful restart */
401: if ((p->gr_active == BGP_GRS_ACTIVE) &&
402: (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
403: bgp_graceful_restart_done(p);
404:
405: /* Check F-bit for long-lived graceful restart */
406: if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) &&
407: (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING)))
408: bgp_graceful_restart_done(p);
409:
410: /* GR capability implies that neighbor will send End-of-RIB */
411: if (conn->peer_gr_aware)
412: p->load_state = BFS_LOADING;
413:
414: /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
415:
416: bgp_conn_set_state(conn, BS_ESTABLISHED);
417: proto_notify_state(&p->p, PS_UP);
418: }
419:
420: static void
421: bgp_conn_leave_established_state(struct bgp_proto *p)
422: {
423: BGP_TRACE(D_EVENTS, "BGP session closed");
424: p->conn = NULL;
425:
426: bgp_free_prefix_table(p);
427: bgp_free_bucket_table(p);
428:
429: if (p->p.proto_state == PS_UP)
430: bgp_stop(p, 0, NULL, 0);
431: }
432:
433: void
434: bgp_conn_enter_close_state(struct bgp_conn *conn)
435: {
436: struct bgp_proto *p = conn->bgp;
437: int os = conn->state;
438:
439: bgp_conn_set_state(conn, BS_CLOSE);
440: tm_stop(conn->keepalive_timer);
441: conn->sk->rx_hook = NULL;
442:
443: /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
444: bgp_start_timer(conn->hold_timer, 10);
445:
446: if (os == BS_ESTABLISHED)
447: bgp_conn_leave_established_state(p);
448: }
449:
450: void
451: bgp_conn_enter_idle_state(struct bgp_conn *conn)
452: {
453: struct bgp_proto *p = conn->bgp;
454: int os = conn->state;
455:
456: bgp_close_conn(conn);
457: bgp_conn_set_state(conn, BS_IDLE);
458: ev_schedule(p->event);
459:
460: if (os == BS_ESTABLISHED)
461: bgp_conn_leave_established_state(p);
462: }
463:
464: /**
465: * bgp_handle_graceful_restart - handle detected BGP graceful restart
466: * @p: BGP instance
467: *
468: * This function is called when a BGP graceful restart of the neighbor is
469: * detected (when the TCP connection fails or when a new TCP connection
470: * appears). The function activates processing of the restart - starts routing
471: * table refresh cycle and activates BGP restart timer. The protocol state goes
472: * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
473: * caller.
474: */
475: void
476: bgp_handle_graceful_restart(struct bgp_proto *p)
477: {
478: ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
479:
480: BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
481: p->gr_active ? " - already pending" : "");
482: proto_notify_state(&p->p, PS_START);
483:
484: switch (p->gr_active)
485: {
486: case BGP_GRS_ACTIVE:
487: rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
488: break;
489:
490: case BGP_GRS_LLGR_1:
491: rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
492: return;
493:
494: case BGP_GRS_LLGR_2:
495: rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
496: rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
497: return;
498: }
499:
500: p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0;
501: p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1;
502: tm_start(p->gr_timer, p->conn->peer_gr_time);
503: rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
504: }
505:
506: /**
507: * bgp_graceful_restart_done - finish active BGP graceful restart
508: * @p: BGP instance
509: *
510: * This function is called when the active BGP graceful restart of the neighbor
511: * should be finished - either successfully (the neighbor sends all paths and
512: * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
513: * not support BGP graceful restart on the new session). The function ends
514: * routing table refresh cycle and stops BGP restart timer.
515: */
516: void
517: bgp_graceful_restart_done(struct bgp_proto *p)
518: {
519: BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
520: p->gr_active = 0;
521: tm_stop(p->gr_timer);
522: rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
523: }
524:
525: /**
526: * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
527: * @t: timer
528: *
529: * This function is a timeout hook for @gr_timer, implementing BGP restart time
530: * limit for reestablisment of the BGP session after the graceful restart. When
531: * fired, we just proceed with the usual protocol restart.
532: */
533:
534: static void
535: bgp_graceful_restart_timeout(timer *t)
536: {
537: struct bgp_proto *p = t->data;
538:
539: switch (p->gr_active)
540: {
541: case BGP_GRS_ACTIVE:
542: BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
543: bgp_stop(p, 0, NULL, 0);
544: return;
545:
546: case BGP_GRS_LLGR_1:
547: BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
548: p->gr_active = BGP_GRS_LLGR_2;
549: tm_start(p->gr_timer, p->stale_time);
550: rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
551: return;
552:
553: case BGP_GRS_LLGR_2:
554: BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout");
555: p->gr_active = 0;
556: rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
557: return;
558: }
559: }
560:
561: /**
562: * bgp_refresh_begin - start incoming enhanced route refresh sequence
563: * @p: BGP instance
564: *
565: * This function is called when an incoming enhanced route refresh sequence is
566: * started by the neighbor, demarcated by the BoRR packet. The function updates
567: * the load state and starts the routing table refresh cycle. Note that graceful
568: * restart also uses routing table refresh cycle, but RFC 7313 and load states
569: * ensure that these two sequences do not overlap.
570: */
571: void
572: bgp_refresh_begin(struct bgp_proto *p)
573: {
574: if (p->load_state == BFS_LOADING)
575: { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
576:
577: p->load_state = BFS_REFRESHING;
578: rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
579: }
580:
581: /**
582: * bgp_refresh_end - finish incoming enhanced route refresh sequence
583: * @p: BGP instance
584: *
585: * This function is called when an incoming enhanced route refresh sequence is
586: * finished by the neighbor, demarcated by the EoRR packet. The function updates
587: * the load state and ends the routing table refresh cycle. Routes not received
588: * during the sequence are removed by the nest.
589: */
590: void
591: bgp_refresh_end(struct bgp_proto *p)
592: {
593: if (p->load_state != BFS_REFRESHING)
594: { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
595:
596: p->load_state = BFS_NONE;
597: rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
598: }
599:
600:
601: static void
602: bgp_send_open(struct bgp_conn *conn)
603: {
604: conn->start_state = conn->bgp->start_state;
605:
606: // Default values, possibly changed by receiving capabilities.
607: conn->advertised_as = 0;
608: conn->peer_refresh_support = 0;
609: conn->peer_as4_support = 0;
610: conn->peer_add_path = 0;
611: conn->peer_enhanced_refresh_support = 0;
612: conn->peer_gr_aware = 0;
613: conn->peer_gr_able = 0;
614: conn->peer_gr_time = 0;
615: conn->peer_gr_flags = 0;
616: conn->peer_gr_aflags = 0;
617: conn->peer_llgr_aware = 0;
618: conn->peer_llgr_able = 0;
619: conn->peer_llgr_time = 0;
620: conn->peer_llgr_aflags = 0;
621: conn->peer_ext_messages_support = 0;
622:
623: DBG("BGP: Sending open\n");
624: conn->sk->rx_hook = bgp_rx;
625: conn->sk->tx_hook = bgp_tx;
626: tm_stop(conn->connect_retry_timer);
627: bgp_schedule_packet(conn, PKT_OPEN);
628: bgp_conn_set_state(conn, BS_OPENSENT);
629: bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
630: }
631:
632: static void
633: bgp_connected(sock *sk)
634: {
635: struct bgp_conn *conn = sk->data;
636: struct bgp_proto *p = conn->bgp;
637:
638: BGP_TRACE(D_EVENTS, "Connected");
639: bgp_send_open(conn);
640: }
641:
642: static void
643: bgp_connect_timeout(timer *t)
644: {
645: struct bgp_conn *conn = t->data;
646: struct bgp_proto *p = conn->bgp;
647:
648: DBG("BGP: connect_timeout\n");
649: if (p->p.proto_state == PS_START)
650: {
651: bgp_close_conn(conn);
652: bgp_connect(p);
653: }
654: else
655: bgp_conn_enter_idle_state(conn);
656: }
657:
658: static void
659: bgp_sock_err(sock *sk, int err)
660: {
661: struct bgp_conn *conn = sk->data;
662: struct bgp_proto *p = conn->bgp;
663:
664: /*
665: * This error hook may be called either asynchronously from main
666: * loop, or synchronously from sk_send(). But sk_send() is called
667: * only from bgp_tx() and bgp_kick_tx(), which are both called
668: * asynchronously from main loop. Moreover, they end if err hook is
669: * called. Therefore, we could suppose that it is always called
670: * asynchronously.
671: */
672:
673: bgp_store_error(p, conn, BE_SOCKET, err);
674:
675: if (err)
676: BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
677: else
678: BGP_TRACE(D_EVENTS, "Connection closed");
679:
680: if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
681: bgp_handle_graceful_restart(p);
682:
683: bgp_conn_enter_idle_state(conn);
684: }
685:
686: static void
687: bgp_hold_timeout(timer *t)
688: {
689: struct bgp_conn *conn = t->data;
690: struct bgp_proto *p = conn->bgp;
691:
692: DBG("BGP: Hold timeout\n");
693:
694: /* We are already closing the connection - just do hangup */
695: if (conn->state == BS_CLOSE)
696: {
697: BGP_TRACE(D_EVENTS, "Connection stalled");
698: bgp_conn_enter_idle_state(conn);
699: return;
700: }
701:
702: /* If there is something in input queue, we are probably congested
703: and perhaps just not processed BGP packets in time. */
704:
705: if (sk_rx_ready(conn->sk) > 0)
706: bgp_start_timer(conn->hold_timer, 10);
707: else if ((conn->state == BS_ESTABLISHED) && p->gr_ready && conn->peer_llgr_able)
708: {
709: BGP_TRACE(D_EVENTS, "Hold timer expired");
710: bgp_handle_graceful_restart(p);
711: bgp_conn_enter_idle_state(conn);
712: }
713: else
714: bgp_error(conn, 4, 0, NULL, 0);
715: }
716:
717: static void
718: bgp_keepalive_timeout(timer *t)
719: {
720: struct bgp_conn *conn = t->data;
721:
722: DBG("BGP: Keepalive timer\n");
723: bgp_schedule_packet(conn, PKT_KEEPALIVE);
724:
725: /* Kick TX a bit faster */
726: if (ev_active(conn->tx_ev))
727: ev_run(conn->tx_ev);
728: }
729:
730: static void
731: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
732: {
733: timer *t;
734:
735: conn->sk = NULL;
736: conn->bgp = p;
737: conn->packets_to_send = 0;
738:
739: t = conn->connect_retry_timer = tm_new(p->p.pool);
740: t->hook = bgp_connect_timeout;
741: t->data = conn;
742: t = conn->hold_timer = tm_new(p->p.pool);
743: t->hook = bgp_hold_timeout;
744: t->data = conn;
745: t = conn->keepalive_timer = tm_new(p->p.pool);
746: t->hook = bgp_keepalive_timeout;
747: t->data = conn;
748: conn->tx_ev = ev_new(p->p.pool);
749: conn->tx_ev->hook = bgp_kick_tx;
750: conn->tx_ev->data = conn;
751: }
752:
753: static void
754: bgp_setup_sk(struct bgp_conn *conn, sock *s)
755: {
756: s->data = conn;
757: s->err_hook = bgp_sock_err;
758: s->fast_rx = 1;
759: conn->sk = s;
760: }
761:
762: static void
763: bgp_active(struct bgp_proto *p)
764: {
765: int delay = MAX(1, p->cf->connect_delay_time);
766: struct bgp_conn *conn = &p->outgoing_conn;
767:
768: BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
769: bgp_setup_conn(p, conn);
770: bgp_conn_set_state(conn, BS_ACTIVE);
771: bgp_start_timer(conn->connect_retry_timer, delay);
772: }
773:
774: /**
775: * bgp_connect - initiate an outgoing connection
776: * @p: BGP instance
777: *
778: * The bgp_connect() function creates a new &bgp_conn and initiates
779: * a TCP connection to the peer. The rest of connection setup is governed
780: * by the BGP state machine as described in the standard.
781: */
782: static void
783: bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
784: {
785: sock *s;
786: struct bgp_conn *conn = &p->outgoing_conn;
787: int hops = p->cf->multihop ? : 1;
788:
789: DBG("BGP: Connecting\n");
790: s = sk_new(p->p.pool);
791: s->type = SK_TCP_ACTIVE;
792: s->saddr = p->source_addr;
793: s->daddr = p->cf->remote_ip;
794: s->dport = p->cf->remote_port;
795: s->iface = p->neigh ? p->neigh->iface : NULL;
796: s->vrf = p->p.vrf;
797: s->ttl = p->cf->ttl_security ? 255 : hops;
798: s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
799: s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
800: s->tos = IP_PREC_INTERNET_CONTROL;
801: s->password = p->cf->password;
802: s->tx_hook = bgp_connected;
803: BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
804: s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
805: bgp_setup_conn(p, conn);
806: bgp_setup_sk(conn, s);
807: bgp_conn_set_state(conn, BS_CONNECT);
808:
809: if (sk_open(s) < 0)
810: goto err;
811:
812: /* Set minimal receive TTL if needed */
813: if (p->cf->ttl_security)
814: if (sk_set_min_ttl(s, 256 - hops) < 0)
815: goto err;
816:
817: DBG("BGP: Waiting for connect success\n");
818: bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
819: return;
820:
821: err:
822: sk_log_error(s, p->p.name);
823: bgp_sock_err(s, 0);
824: return;
825: }
826:
827: /**
828: * bgp_find_proto - find existing proto for incoming connection
829: * @sk: TCP socket
830: *
831: */
832: static struct bgp_proto *
833: bgp_find_proto(sock *sk)
834: {
835: struct proto_config *pc;
836:
837: WALK_LIST(pc, config->protos)
838: if ((pc->protocol == &proto_bgp) && pc->proto)
839: {
840: struct bgp_proto *p = (struct bgp_proto *) pc->proto;
841: if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
842: (!p->cf->iface || (p->cf->iface == sk->iface)))
843: return p;
844: }
845:
846: return NULL;
847: }
848:
849: /**
850: * bgp_incoming_connection - handle an incoming connection
851: * @sk: TCP socket
852: * @dummy: unused
853: *
854: * This function serves as a socket hook for accepting of new BGP
855: * connections. It searches a BGP instance corresponding to the peer
856: * which has connected and if such an instance exists, it creates a
857: * &bgp_conn structure, attaches it to the instance and either sends
858: * an Open message or (if there already is an active connection) it
859: * closes the new connection by sending a Notification message.
860: */
861: static int
862: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
863: {
864: struct bgp_proto *p;
865: int acc, hops;
866:
867: DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
868: p = bgp_find_proto(sk);
869: if (!p)
870: {
871: log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
872: sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
873: rfree(sk);
874: return 0;
875: }
876:
877: /*
878: * BIRD should keep multiple incoming connections in OpenSent state (for
879: * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
880: * connections are rejected istead. The exception is the case where an
881: * incoming connection triggers a graceful restart.
882: */
883:
884: acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
885: (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
886:
887: if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
888: {
889: bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
890: bgp_handle_graceful_restart(p);
891: bgp_conn_enter_idle_state(p->conn);
892: acc = 1;
893:
894: /* There might be separate incoming connection in OpenSent state */
895: if (p->incoming_conn.state > BS_ACTIVE)
896: bgp_close_conn(&p->incoming_conn);
897: }
898:
899: BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
900: sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
901: sk->dport, acc ? "accepted" : "rejected");
902:
903: if (!acc)
904: {
905: rfree(sk);
906: return 0;
907: }
908:
909: hops = p->cf->multihop ? : 1;
910:
911: if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
912: goto err;
913:
914: if (p->cf->ttl_security)
915: if (sk_set_min_ttl(sk, 256 - hops) < 0)
916: goto err;
917:
918: if (p->cf->enable_extended_messages)
919: {
920: sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
921: sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
922: sk_reallocate(sk);
923: }
924:
925: bgp_setup_conn(p, &p->incoming_conn);
926: bgp_setup_sk(&p->incoming_conn, sk);
927: bgp_send_open(&p->incoming_conn);
928: return 0;
929:
930: err:
931: sk_log_error(sk, p->p.name);
932: log(L_ERR "%s: Incoming connection aborted", p->p.name);
933: rfree(sk);
934: return 0;
935: }
936:
937: static void
938: bgp_listen_sock_err(sock *sk UNUSED, int err)
939: {
940: if (err == ECONNABORTED)
941: log(L_WARN "BGP: Incoming connection aborted");
942: else
943: log(L_ERR "BGP: Error on listening socket: %M", err);
944: }
945:
946: static sock *
947: bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
948: {
949: sock *s = sk_new(&root_pool);
950: DBG("BGP: Creating listening socket\n");
951: s->type = SK_TCP_PASSIVE;
952: s->ttl = 255;
953: s->saddr = addr;
954: s->sport = port ? port : BGP_PORT;
955: s->flags = flags ? 0 : SKF_V6ONLY;
956: s->tos = IP_PREC_INTERNET_CONTROL;
957: s->rbsize = BGP_RX_BUFFER_SIZE;
958: s->tbsize = BGP_TX_BUFFER_SIZE;
959: s->rx_hook = bgp_incoming_connection;
960: s->err_hook = bgp_listen_sock_err;
961:
962: if (sk_open(s) < 0)
963: goto err;
964:
965: return s;
966:
967: err:
968: sk_log_error(s, "BGP");
969: log(L_ERR "BGP: Cannot open listening socket");
970: rfree(s);
971: return NULL;
972: }
973:
974: static void
975: bgp_start_neighbor(struct bgp_proto *p)
976: {
977: /* Called only for single-hop BGP sessions */
978:
979: if (ipa_zero(p->source_addr))
980: p->source_addr = p->neigh->ifa->ip;
981:
982: #ifdef IPV6
983: {
984: struct ifa *a;
985: p->local_link = IPA_NONE;
986: WALK_LIST(a, p->neigh->iface->addrs)
987: if (a->scope == SCOPE_LINK)
988: {
989: p->local_link = a->ip;
990: break;
991: }
992:
993: if (! ipa_nonzero(p->local_link))
994: log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
995:
996: DBG("BGP: Selected link-level address %I\n", p->local_link);
997: }
998: #endif
999:
1000: bgp_initiate(p);
1001: }
1002:
1003: static void
1004: bgp_neigh_notify(neighbor *n)
1005: {
1006: struct bgp_proto *p = (struct bgp_proto *) n->proto;
1007: int ps = p->p.proto_state;
1008:
1009: if (n != p->neigh)
1010: return;
1011:
1012: if ((ps == PS_DOWN) || (ps == PS_STOP))
1013: return;
1014:
1015: int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1016:
1017: if (n->scope <= 0)
1018: {
1019: if (!prepare)
1020: {
1021: BGP_TRACE(D_EVENTS, "Neighbor lost");
1022: bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1023: /* Perhaps also run bgp_update_startup_delay(p)? */
1024: bgp_stop(p, 0, NULL, 0);
1025: }
1026: }
1027: else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1028: {
1029: if (!prepare)
1030: {
1031: BGP_TRACE(D_EVENTS, "Link down");
1032: bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1033: if (ps == PS_UP)
1034: bgp_update_startup_delay(p);
1035: bgp_stop(p, 0, NULL, 0);
1036: }
1037: }
1038: else
1039: {
1040: if (prepare)
1041: {
1042: BGP_TRACE(D_EVENTS, "Neighbor ready");
1043: bgp_start_neighbor(p);
1044: }
1045: }
1046: }
1047:
1048: static void
1049: bgp_bfd_notify(struct bfd_request *req)
1050: {
1051: struct bgp_proto *p = req->data;
1052: int ps = p->p.proto_state;
1053:
1054: if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1055: {
1056: BGP_TRACE(D_EVENTS, "BFD session down");
1057: bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1058:
1059: if (p->cf->bfd == BGP_BFD_GRACEFUL)
1060: {
1061: /* Trigger graceful restart */
1062: if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1063: bgp_handle_graceful_restart(p);
1064:
1065: if (p->incoming_conn.state > BS_IDLE)
1066: bgp_conn_enter_idle_state(&p->incoming_conn);
1067:
1068: if (p->outgoing_conn.state > BS_IDLE)
1069: bgp_conn_enter_idle_state(&p->outgoing_conn);
1070: }
1071: else
1072: {
1073: /* Trigger session down */
1074: if (ps == PS_UP)
1075: bgp_update_startup_delay(p);
1076: bgp_stop(p, 0, NULL, 0);
1077: }
1078: }
1079: }
1080:
1081: static void
1082: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1083: {
1084: if (use_bfd && !p->bfd_req)
1085: p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1086: p->cf->multihop ? NULL : p->neigh->iface,
1087: p->p.vrf, bgp_bfd_notify, p);
1088:
1089: if (!use_bfd && p->bfd_req)
1090: {
1091: rfree(p->bfd_req);
1092: p->bfd_req = NULL;
1093: }
1094: }
1095:
1096: static int
1097: bgp_reload_routes(struct proto *P)
1098: {
1099: struct bgp_proto *p = (struct bgp_proto *) P;
1100: if (!p->conn || !p->conn->peer_refresh_support)
1101: return 0;
1102:
1103: bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
1104: return 1;
1105: }
1106:
1107: static void
1108: bgp_feed_begin(struct proto *P, int initial)
1109: {
1110: struct bgp_proto *p = (struct bgp_proto *) P;
1111:
1112: /* This should not happen */
1113: if (!p->conn)
1114: return;
1115:
1116: if (initial && p->cf->gr_mode)
1117: p->feed_state = BFS_LOADING;
1118:
1119: /* It is refeed and both sides support enhanced route refresh */
1120: if (!initial && p->cf->enable_refresh &&
1121: p->conn->peer_enhanced_refresh_support)
1122: {
1123: /* BoRR must not be sent before End-of-RIB */
1124: if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
1125: return;
1126:
1127: p->feed_state = BFS_REFRESHING;
1128: bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
1129: }
1130: }
1131:
1132: static void
1133: bgp_feed_end(struct proto *P)
1134: {
1135: struct bgp_proto *p = (struct bgp_proto *) P;
1136:
1137: /* This should not happen */
1138: if (!p->conn)
1139: return;
1140:
1141: /* Non-demarcated feed ended, nothing to do */
1142: if (p->feed_state == BFS_NONE)
1143: return;
1144:
1145: /* Schedule End-of-RIB packet */
1146: if (p->feed_state == BFS_LOADING)
1147: p->feed_state = BFS_LOADED;
1148:
1149: /* Schedule EoRR packet */
1150: if (p->feed_state == BFS_REFRESHING)
1151: p->feed_state = BFS_REFRESHED;
1152:
1153: /* Kick TX hook */
1154: bgp_schedule_packet(p->conn, PKT_UPDATE);
1155: }
1156:
1157:
1158: static void
1159: bgp_start_locked(struct object_lock *lock)
1160: {
1161: struct bgp_proto *p = lock->data;
1162: struct bgp_config *cf = p->cf;
1163:
1164: if (p->p.proto_state != PS_START)
1165: {
1166: DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1167: return;
1168: }
1169:
1170: DBG("BGP: Got lock\n");
1171:
1172: if (cf->multihop)
1173: {
1174: /* Multi-hop sessions do not use neighbor entries */
1175: bgp_initiate(p);
1176: return;
1177: }
1178:
1179: neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1180: if (!n)
1181: {
1182: log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1183: /* As we do not start yet, we can just disable protocol */
1184: p->p.disabled = 1;
1185: bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1186: proto_notify_state(&p->p, PS_DOWN);
1187: return;
1188: }
1189:
1190: p->neigh = n;
1191:
1192: if (n->scope <= 0)
1193: BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1194: else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1195: BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1196: else
1197: bgp_start_neighbor(p);
1198: }
1199:
1200: static int
1201: bgp_start(struct proto *P)
1202: {
1203: struct bgp_proto *p = (struct bgp_proto *) P;
1204: struct object_lock *lock;
1205:
1206: DBG("BGP: Startup.\n");
1207: p->start_state = BSS_PREPARE;
1208: p->outgoing_conn.state = BS_IDLE;
1209: p->incoming_conn.state = BS_IDLE;
1210: p->neigh = NULL;
1211: p->bfd_req = NULL;
1212: p->gr_ready = 0;
1213: p->gr_active = 0;
1214:
1215: rt_lock_table(p->igp_table);
1216:
1217: p->event = ev_new(p->p.pool);
1218: p->event->hook = bgp_decision;
1219: p->event->data = p;
1220:
1221: p->startup_timer = tm_new(p->p.pool);
1222: p->startup_timer->hook = bgp_startup_timeout;
1223: p->startup_timer->data = p;
1224:
1225: p->gr_timer = tm_new(p->p.pool);
1226: p->gr_timer->hook = bgp_graceful_restart_timeout;
1227: p->gr_timer->data = p;
1228:
1229: p->local_id = proto_get_router_id(P->cf);
1230: if (p->rr_client)
1231: p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1232:
1233: p->remote_id = 0;
1234: p->source_addr = p->cf->source_addr;
1235:
1236: if (p->p.gr_recovery && p->cf->gr_mode)
1237: proto_graceful_restart_lock(P);
1238:
1239: /*
1240: * Before attempting to create the connection, we need to lock the
1241: * port, so that are sure we're the only instance attempting to talk
1242: * with that neighbor.
1243: */
1244:
1245: lock = p->lock = olock_new(P->pool);
1246: lock->addr = p->cf->remote_ip;
1247: lock->port = p->cf->remote_port;
1248: lock->iface = p->cf->iface;
1249: lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1250: lock->type = OBJLOCK_TCP;
1251: lock->hook = bgp_start_locked;
1252: lock->data = p;
1253: olock_acquire(lock);
1254:
1255: return PS_START;
1256: }
1257:
1258: extern int proto_restart;
1259:
1260: static int
1261: bgp_shutdown(struct proto *P)
1262: {
1263: struct bgp_proto *p = (struct bgp_proto *) P;
1264: uint subcode = 0;
1265:
1266: char *message = NULL;
1267: byte *data = NULL;
1268: uint len = 0;
1269:
1270: BGP_TRACE(D_EVENTS, "Shutdown requested");
1271:
1272: switch (P->down_code)
1273: {
1274: case PDC_CF_REMOVE:
1275: case PDC_CF_DISABLE:
1276: subcode = 3; // Errcode 6, 3 - peer de-configured
1277: break;
1278:
1279: case PDC_CF_RESTART:
1280: subcode = 6; // Errcode 6, 6 - other configuration change
1281: break;
1282:
1283: case PDC_CMD_DISABLE:
1284: case PDC_CMD_SHUTDOWN:
1285: subcode = 2; // Errcode 6, 2 - administrative shutdown
1286: message = P->message;
1287: break;
1288:
1289: case PDC_CMD_RESTART:
1290: subcode = 4; // Errcode 6, 4 - administrative reset
1291: message = P->message;
1292: break;
1293:
1294: case PDC_RX_LIMIT_HIT:
1295: case PDC_IN_LIMIT_HIT:
1296: subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1297: /* log message for compatibility */
1298: log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1299: goto limit;
1300:
1301: case PDC_OUT_LIMIT_HIT:
1302: subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1303:
1304: limit:
1305: bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1306: if (proto_restart)
1307: bgp_update_startup_delay(p);
1308: else
1309: p->startup_delay = 0;
1310: goto done;
1311: }
1312:
1313: bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1314: p->startup_delay = 0;
1315:
1316: /* RFC 8203 - shutdown communication */
1317: if (message)
1318: {
1319: uint msg_len = strlen(message);
1320: msg_len = MIN(msg_len, 255);
1321:
1322: /* Buffer will be freed automatically by protocol shutdown */
1323: data = mb_alloc(p->p.pool, msg_len + 1);
1324: len = msg_len + 1;
1325:
1326: data[0] = msg_len;
1327: memcpy(data+1, message, msg_len);
1328: }
1329:
1330: done:
1331: bgp_stop(p, subcode, data, len);
1332: return p->p.proto_state;
1333: }
1334:
1335: static void
1336: bgp_cleanup(struct proto *P)
1337: {
1338: struct bgp_proto *p = (struct bgp_proto *) P;
1339: rt_unlock_table(p->igp_table);
1340: }
1341:
1342: static rtable *
1343: get_igp_table(struct bgp_config *cf)
1344: {
1345: return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1346: }
1347:
1348: static struct proto *
1349: bgp_init(struct proto_config *C)
1350: {
1351: struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1352: struct bgp_config *c = (struct bgp_config *) C;
1353: struct bgp_proto *p = (struct bgp_proto *) P;
1354:
1355: P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1356: P->rt_notify = bgp_rt_notify;
1357: P->import_control = bgp_import_control;
1358: P->neigh_notify = bgp_neigh_notify;
1359: P->reload_routes = bgp_reload_routes;
1360: P->feed_begin = bgp_feed_begin;
1361: P->feed_end = bgp_feed_end;
1362: P->rte_better = bgp_rte_better;
1363: P->rte_mergable = bgp_rte_mergable;
1364: P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1365: P->rte_modify = bgp_rte_modify_stale;
1366:
1367: p->cf = c;
1368: p->local_as = c->local_as;
1369: p->remote_as = c->remote_as;
1370: p->is_internal = (c->local_as == c->remote_as);
1371: p->rs_client = c->rs_client;
1372: p->rr_client = c->rr_client;
1373: p->igp_table = get_igp_table(c);
1374:
1375: return P;
1376: }
1377:
1378:
1379: void
1380: bgp_check_config(struct bgp_config *c)
1381: {
1382: int internal = (c->local_as == c->remote_as);
1383:
1384: /* Do not check templates at all */
1385: if (c->c.class == SYM_TEMPLATE)
1386: return;
1387:
1388:
1389: /* EBGP direct by default, IBGP multihop by default */
1390: if (c->multihop < 0)
1391: c->multihop = internal ? 64 : 0;
1392:
1393: /* Different default for gw_mode */
1394: if (!c->gw_mode)
1395: c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1396:
1397: /* Different default based on rs_client */
1398: if (!c->missing_lladdr)
1399: c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1400:
1401: /* LLGR mode default based on GR mode */
1402: if (c->llgr_mode < 0)
1403: c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0;
1404:
1405: /* Disable after error incompatible with restart limit action */
1406: if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1407: c->c.in_limit->action = PLA_DISABLE;
1408:
1409:
1410: if (!c->local_as)
1411: cf_error("Local AS number must be set");
1412:
1413: if (ipa_zero(c->remote_ip))
1414: cf_error("Neighbor must be configured");
1415:
1416: if (!c->remote_as)
1417: cf_error("Remote AS number must be set");
1418:
1419: if (ipa_is_link_local(c->remote_ip) && !c->iface)
1420: cf_error("Link-local neighbor address requires specified interface");
1421:
1422: if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1423: cf_error("Neighbor AS number out of range (AS4 not available)");
1424:
1425: if (!internal && c->rr_client)
1426: cf_error("Only internal neighbor can be RR client");
1427:
1428: if (internal && c->rs_client)
1429: cf_error("Only external neighbor can be RS client");
1430:
1431: if (c->multihop && (c->gw_mode == GW_DIRECT))
1432: cf_error("Multihop BGP cannot use direct gateway mode");
1433:
1434: if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1435: ipa_is_link_local(c->source_addr)))
1436: cf_error("Multihop BGP cannot be used with link-local addresses");
1437:
1438: if (c->multihop && c->iface)
1439: cf_error("Multihop BGP cannot be bound to interface");
1440:
1441: if (c->multihop && c->check_link)
1442: cf_error("Multihop BGP cannot depend on link state");
1443:
1444: if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1445: cf_error("Multihop BGP with BFD requires specified source address");
1446:
1447: if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1448: cf_error("BGP in recursive mode prohibits sorted table");
1449:
1450: if (c->deterministic_med && c->c.table->sorted)
1451: cf_error("BGP with deterministic MED prohibits sorted table");
1452:
1453: if (c->secondary && !c->c.table->sorted)
1454: cf_error("BGP with secondary option requires sorted table");
1455:
1456: if (!c->gr_mode && c->llgr_mode)
1457: cf_error("Long-lived graceful restart requires basic graceful restart");
1458: }
1459:
1460: static int
1461: bgp_reconfigure(struct proto *P, struct proto_config *C)
1462: {
1463: struct bgp_config *new = (struct bgp_config *) C;
1464: struct bgp_proto *p = (struct bgp_proto *) P;
1465: struct bgp_config *old = p->cf;
1466:
1467: if (proto_get_router_id(C) != p->local_id)
1468: return 0;
1469:
1470: int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1471: ((byte *) new) + sizeof(struct proto_config),
1472: // password item is last and must be checked separately
1473: OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1474: && ((!old->password && !new->password)
1475: || (old->password && new->password && !strcmp(old->password, new->password)))
1476: && (get_igp_table(old) == get_igp_table(new));
1477:
1478: if (same && (p->start_state > BSS_PREPARE))
1479: bgp_update_bfd(p, new->bfd);
1480:
1481: /* We should update our copy of configuration ptr as old configuration will be freed */
1482: if (same)
1483: p->cf = new;
1484:
1485: return same;
1486: }
1487:
1488: static void
1489: bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1490: {
1491: /* Just a shallow copy */
1492: proto_copy_rest(dest, src, sizeof(struct bgp_config));
1493: }
1494:
1495:
1496: /**
1497: * bgp_error - report a protocol error
1498: * @c: connection
1499: * @code: error code (according to the RFC)
1500: * @subcode: error sub-code
1501: * @data: data to be passed in the Notification message
1502: * @len: length of the data
1503: *
1504: * bgp_error() sends a notification packet to tell the other side that a protocol
1505: * error has occurred (including the data considered erroneous if possible) and
1506: * closes the connection.
1507: */
1508: void
1509: bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1510: {
1511: struct bgp_proto *p = c->bgp;
1512:
1513: if (c->state == BS_CLOSE)
1514: return;
1515:
1516: bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1517: bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1518: bgp_conn_enter_close_state(c);
1519:
1520: c->notify_code = code;
1521: c->notify_subcode = subcode;
1522: c->notify_data = data;
1523: c->notify_size = (len > 0) ? len : 0;
1524: bgp_schedule_packet(c, PKT_NOTIFICATION);
1525:
1526: if (code != 6)
1527: {
1528: bgp_update_startup_delay(p);
1529: bgp_stop(p, 0, NULL, 0);
1530: }
1531: }
1532:
1533: /**
1534: * bgp_store_error - store last error for status report
1535: * @p: BGP instance
1536: * @c: connection
1537: * @class: error class (BE_xxx constants)
1538: * @code: error code (class specific)
1539: *
1540: * bgp_store_error() decides whether given error is interesting enough
1541: * and store that error to last_error variables of @p
1542: */
1543: void
1544: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1545: {
1546: /* During PS_UP, we ignore errors on secondary connection */
1547: if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1548: return;
1549:
1550: /* During PS_STOP, we ignore any errors, as we want to report
1551: * the error that caused transition to PS_STOP
1552: */
1553: if (p->p.proto_state == PS_STOP)
1554: return;
1555:
1556: p->last_error_class = class;
1557: p->last_error_code = code;
1558: }
1559:
1560: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1561: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1562: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1563: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1564:
1565: static const char *
1566: bgp_last_errmsg(struct bgp_proto *p)
1567: {
1568: switch (p->last_error_class)
1569: {
1570: case BE_MISC:
1571: return bgp_misc_errors[p->last_error_code];
1572: case BE_SOCKET:
1573: return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1574: case BE_BGP_RX:
1575: case BE_BGP_TX:
1576: return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1577: case BE_AUTO_DOWN:
1578: return bgp_auto_errors[p->last_error_code];
1579: default:
1580: return "";
1581: }
1582: }
1583:
1584: static const char *
1585: bgp_state_dsc(struct bgp_proto *p)
1586: {
1587: if (p->p.proto_state == PS_DOWN)
1588: return "Down";
1589:
1590: int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1591: if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1592: return "Passive";
1593:
1594: return bgp_state_names[state];
1595: }
1596:
1597: static void
1598: bgp_get_status(struct proto *P, byte *buf)
1599: {
1600: struct bgp_proto *p = (struct bgp_proto *) P;
1601:
1602: const char *err1 = bgp_err_classes[p->last_error_class];
1603: const char *err2 = bgp_last_errmsg(p);
1604:
1605: if (P->proto_state == PS_DOWN)
1606: bsprintf(buf, "%s%s", err1, err2);
1607: else
1608: bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1609: }
1610:
1611: static void
1612: bgp_show_proto_info(struct proto *P)
1613: {
1614: struct bgp_proto *p = (struct bgp_proto *) P;
1615: struct bgp_conn *c = p->conn;
1616:
1617: proto_show_basic_info(P);
1618:
1619: cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
1620: cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1621: cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
1622:
1623: if (p->gr_active)
1624: cli_msg(-1006, " Neighbor graceful restart active");
1625:
1626: if (p->gr_active && p->gr_timer->expires)
1627: cli_msg(-1006, " %-15s %d/-",
1628: (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:",
1629: p->gr_timer->expires - now);
1630:
1631: if (P->proto_state == PS_START)
1632: {
1633: struct bgp_conn *oc = &p->outgoing_conn;
1634:
1635: if ((p->start_state < BSS_CONNECT) &&
1636: (p->startup_timer->expires))
1637: cli_msg(-1006, " Error wait: %d/%d",
1638: p->startup_timer->expires - now, p->startup_delay);
1639:
1640: if ((oc->state == BS_ACTIVE) &&
1641: (oc->connect_retry_timer->expires))
1642: cli_msg(-1006, " Connect delay: %d/%d",
1643: oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1644: }
1645: else if (P->proto_state == PS_UP)
1646: {
1647: cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
1648: cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
1649: c->peer_refresh_support ? " refresh" : "",
1650: c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
1651: c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1652: c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""),
1653: c->peer_as4_support ? " AS4" : "",
1654: (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1655: (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
1656: c->peer_ext_messages_support ? " ext-messages" : "");
1657: cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
1658: p->is_internal ? "internal" : "external",
1659: p->cf->multihop ? " multihop" : "",
1660: p->rr_client ? " route-reflector" : "",
1661: p->rs_client ? " route-server" : "",
1662: p->as4_session ? " AS4" : "",
1663: p->add_path_rx ? " add-path-rx" : "",
1664: p->add_path_tx ? " add-path-tx" : "",
1665: p->ext_messages ? " ext-messages" : "");
1666: cli_msg(-1006, " Source address: %I", p->source_addr);
1667: if (P->cf->in_limit)
1668: cli_msg(-1006, " Route limit: %d/%d",
1669: p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1670: cli_msg(-1006, " Hold timer: %d/%d",
1671: tm_remains(c->hold_timer), c->hold_time);
1672: cli_msg(-1006, " Keepalive timer: %d/%d",
1673: tm_remains(c->keepalive_timer), c->keepalive_time);
1674: }
1675:
1676: if ((p->last_error_class != BE_NONE) &&
1677: (p->last_error_class != BE_MAN_DOWN))
1678: {
1679: const char *err1 = bgp_err_classes[p->last_error_class];
1680: const char *err2 = bgp_last_errmsg(p);
1681: cli_msg(-1006, " Last error: %s%s", err1, err2);
1682: }
1683: }
1684:
1685: struct protocol proto_bgp = {
1686: .name = "BGP",
1687: .template = "bgp%d",
1688: .attr_class = EAP_BGP,
1689: .preference = DEF_PREF_BGP,
1690: .config_size = sizeof(struct bgp_config),
1691: .init = bgp_init,
1692: .start = bgp_start,
1693: .shutdown = bgp_shutdown,
1694: .cleanup = bgp_cleanup,
1695: .reconfigure = bgp_reconfigure,
1696: .copy_config = bgp_copy_config,
1697: .get_status = bgp_get_status,
1698: .get_attr = bgp_get_attr,
1699: .get_route_info = bgp_get_route_info,
1700: .show_proto_info = bgp_show_proto_info
1701: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>