File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / proto / bgp / bgp.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 19:50:23 2021 UTC (4 years ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_8p3, HEAD
bird 1.6.8

    1: /*
    2:  *	BIRD -- The Border Gateway Protocol
    3:  *
    4:  *	(c) 2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: /**
   10:  * DOC: Border Gateway Protocol
   11:  *
   12:  * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
   13:  * connection and most of the interface with BIRD core, |packets.c| handling
   14:  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
   15:  * manipulation with BGP attribute lists.
   16:  *
   17:  * As opposed to the other existing routing daemons, BIRD has a sophisticated core
   18:  * architecture which is able to keep all the information needed by BGP in the
   19:  * primary routing table, therefore no complex data structures like a central
   20:  * BGP table are needed. This increases memory footprint of a BGP router with
   21:  * many connections, but not too much and, which is more important, it makes
   22:  * BGP much easier to implement.
   23:  *
   24:  * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
   25:  * structure to which are attached individual connections represented by &bgp_connection
   26:  * (usually, there exists only one connection, but during BGP session setup, there
   27:  * can be more of them). The connections are handled according to the BGP state machine
   28:  * defined in the RFC with all the timers and all the parameters configurable.
   29:  *
   30:  * In incoming direction, we listen on the connection's socket and each time we receive
   31:  * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
   32:  * passes complete packets to bgp_rx_packet() which distributes the packet according
   33:  * to its type.
   34:  *
   35:  * In outgoing direction, we gather all the routing updates and sort them to buckets
   36:  * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
   37:  * of &rta's and a &fib which helps us to find if we already have another route for
   38:  * the same destination queued for sending, so that we can replace it with the new one
   39:  * immediately instead of sending both updates). There also exists a special bucket holding
   40:  * all the route withdrawals which cannot be queued anywhere else as they don't have any
   41:  * attributes. If we have any packet to send (due to either new routes or the connection
   42:  * tracking code wanting to send a Open, Keepalive or Notification message), we call
   43:  * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
   44:  * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
   45:  * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
   46:  * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
   47:  * type if we have more data of the same type to send.
   48:  *
   49:  * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
   50:  * of the attribute blocks and translating them to the language of BIRD's extended attributes
   51:  * and bgp_encode_attrs() which does the converse. Both functions are built around a
   52:  * @bgp_attr_table array describing all important characteristics of all known attributes.
   53:  * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
   54:  *
   55:  * BGP protocol implements graceful restart in both restarting (local restart)
   56:  * and receiving (neighbor restart) roles. The first is handled mostly by the
   57:  * graceful restart code in the nest, BGP protocol just handles capabilities,
   58:  * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
   59:  * The second is implemented by internal restart of the BGP state to %BS_IDLE
   60:  * and protocol state to %PS_START, but keeping the protocol up from the core
   61:  * point of view and therefore maintaining received routes. Routing table
   62:  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
   63:  * stale routes after reestablishment of BGP session during graceful restart.
   64:  */
   65: 
   66: #undef LOCAL_DEBUG
   67: 
   68: #include "nest/bird.h"
   69: #include "nest/iface.h"
   70: #include "nest/protocol.h"
   71: #include "nest/route.h"
   72: #include "nest/cli.h"
   73: #include "nest/locks.h"
   74: #include "conf/conf.h"
   75: #include "lib/socket.h"
   76: #include "lib/resource.h"
   77: #include "lib/string.h"
   78: 
   79: #include "bgp.h"
   80: 
   81: 
   82: struct linpool *bgp_linpool;		/* Global temporary pool */
   83: static sock *bgp_listen_sk;		/* Global listening socket */
   84: static int bgp_counter;			/* Number of protocol instances using the listening socket */
   85: 
   86: static void bgp_close(struct bgp_proto *p, int apply_md5);
   87: static void bgp_connect(struct bgp_proto *p);
   88: static void bgp_active(struct bgp_proto *p);
   89: static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
   90: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
   91: 
   92: 
   93: /**
   94:  * bgp_open - open a BGP instance
   95:  * @p: BGP instance
   96:  *
   97:  * This function allocates and configures shared BGP resources.
   98:  * Should be called as the last step during initialization
   99:  * (when lock is acquired and neighbor is ready).
  100:  * When error, state changed to PS_DOWN, -1 is returned and caller
  101:  * should return immediately.
  102:  */
  103: static int
  104: bgp_open(struct bgp_proto *p)
  105: {
  106:   struct config *cfg = p->cf->c.global;
  107:   int errcode;
  108: 
  109:   if (!bgp_listen_sk)
  110:     bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
  111: 
  112:   if (!bgp_listen_sk)
  113:     {
  114:       errcode = BEM_NO_SOCKET;
  115:       goto err;
  116:     }
  117: 
  118:   if (!bgp_linpool)
  119:     bgp_linpool = lp_new(&root_pool, 4080);
  120: 
  121:   bgp_counter++;
  122: 
  123:   if (p->cf->password)
  124:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
  125: 			p->cf->iface, p->cf->password, p->cf->setkey) < 0)
  126:       {
  127: 	sk_log_error(bgp_listen_sk, p->p.name);
  128: 	bgp_close(p, 0);
  129: 	errcode = BEM_INVALID_MD5;
  130: 	goto err;
  131:       }
  132: 
  133:   return 0;
  134: 
  135: err:
  136:   p->p.disabled = 1;
  137:   bgp_store_error(p, NULL, BE_MISC, errcode);
  138:   proto_notify_state(&p->p, PS_DOWN);
  139:   return -1;
  140: }
  141: 
  142: static void
  143: bgp_startup(struct bgp_proto *p)
  144: {
  145:   BGP_TRACE(D_EVENTS, "Started");
  146:   p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
  147: 
  148:   if (!p->cf->passive)
  149:     bgp_active(p);
  150: }
  151: 
  152: static void
  153: bgp_startup_timeout(timer *t)
  154: {
  155:   bgp_startup(t->data);
  156: }
  157: 
  158: 
  159: static void
  160: bgp_initiate(struct bgp_proto *p)
  161: {
  162:   int rv = bgp_open(p);
  163:   if (rv < 0)
  164:     return;
  165: 
  166:   if (p->cf->bfd)
  167:     bgp_update_bfd(p, p->cf->bfd);
  168: 
  169:   if (p->startup_delay)
  170:     {
  171:       p->start_state = BSS_DELAY;
  172:       BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
  173:       bgp_start_timer(p->startup_timer, p->startup_delay);
  174:     }
  175:   else
  176:     bgp_startup(p);
  177: }
  178: 
  179: /**
  180:  * bgp_close - close a BGP instance
  181:  * @p: BGP instance
  182:  * @apply_md5: 0 to disable unsetting MD5 auth
  183:  *
  184:  * This function frees and deconfigures shared BGP resources.
  185:  * @apply_md5 is set to 0 when bgp_close is called as a cleanup
  186:  * from failed bgp_open().
  187:  */
  188: static void
  189: bgp_close(struct bgp_proto *p, int apply_md5)
  190: {
  191:   ASSERT(bgp_counter);
  192:   bgp_counter--;
  193: 
  194:   if (p->cf->password && apply_md5)
  195:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
  196: 			p->cf->iface, NULL, p->cf->setkey) < 0)
  197:       sk_log_error(bgp_listen_sk, p->p.name);
  198: 
  199:   if (!bgp_counter)
  200:     {
  201:       rfree(bgp_listen_sk);
  202:       bgp_listen_sk = NULL;
  203:       rfree(bgp_linpool);
  204:       bgp_linpool = NULL;
  205:     }
  206: }
  207: 
  208: /**
  209:  * bgp_start_timer - start a BGP timer
  210:  * @t: timer
  211:  * @value: time to fire (0 to disable the timer)
  212:  *
  213:  * This functions calls tm_start() on @t with time @value and the
  214:  * amount of randomization suggested by the BGP standard. Please use
  215:  * it for all BGP timers.
  216:  */
  217: void
  218: bgp_start_timer(timer *t, int value)
  219: {
  220:   if (value)
  221:     {
  222:       /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
  223:       t->randomize = value / 4;
  224:       tm_start(t, value - t->randomize);
  225:     }
  226:   else
  227:     tm_stop(t);
  228: }
  229: 
  230: /**
  231:  * bgp_close_conn - close a BGP connection
  232:  * @conn: connection to close
  233:  *
  234:  * This function takes a connection described by the &bgp_conn structure,
  235:  * closes its socket and frees all resources associated with it.
  236:  */
  237: void
  238: bgp_close_conn(struct bgp_conn *conn)
  239: {
  240:   // struct bgp_proto *p = conn->bgp;
  241: 
  242:   DBG("BGP: Closing connection\n");
  243:   conn->packets_to_send = 0;
  244:   rfree(conn->connect_retry_timer);
  245:   conn->connect_retry_timer = NULL;
  246:   rfree(conn->keepalive_timer);
  247:   conn->keepalive_timer = NULL;
  248:   rfree(conn->hold_timer);
  249:   conn->hold_timer = NULL;
  250:   rfree(conn->sk);
  251:   conn->sk = NULL;
  252:   rfree(conn->tx_ev);
  253:   conn->tx_ev = NULL;
  254: }
  255: 
  256: 
  257: /**
  258:  * bgp_update_startup_delay - update a startup delay
  259:  * @p: BGP instance
  260:  *
  261:  * This function updates a startup delay that is used to postpone next BGP connect.
  262:  * It also handles disable_after_error and might stop BGP instance when error
  263:  * happened and disable_after_error is on.
  264:  *
  265:  * It should be called when BGP protocol error happened.
  266:  */
  267: void
  268: bgp_update_startup_delay(struct bgp_proto *p)
  269: {
  270:   struct bgp_config *cf = p->cf;
  271: 
  272:   DBG("BGP: Updating startup delay\n");
  273: 
  274:   if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
  275:     p->startup_delay = 0;
  276: 
  277:   p->last_proto_error = now;
  278: 
  279:   if (cf->disable_after_error)
  280:     {
  281:       p->startup_delay = 0;
  282:       p->p.disabled = 1;
  283:       return;
  284:     }
  285: 
  286:   if (!p->startup_delay)
  287:     p->startup_delay = cf->error_delay_time_min;
  288:   else
  289:     p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
  290: }
  291: 
  292: static void
  293: bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
  294: {
  295:   switch (conn->state)
  296:     {
  297:     case BS_IDLE:
  298:     case BS_CLOSE:
  299:       return;
  300:     case BS_CONNECT:
  301:     case BS_ACTIVE:
  302:       bgp_conn_enter_idle_state(conn);
  303:       return;
  304:     case BS_OPENSENT:
  305:     case BS_OPENCONFIRM:
  306:     case BS_ESTABLISHED:
  307:       bgp_error(conn, 6, subcode, data, len);
  308:       return;
  309:     default:
  310:       bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
  311:     }
  312: }
  313: 
  314: static void
  315: bgp_down(struct bgp_proto *p)
  316: {
  317:   if (p->start_state > BSS_PREPARE)
  318:     bgp_close(p, 1);
  319: 
  320:   BGP_TRACE(D_EVENTS, "Down");
  321:   proto_notify_state(&p->p, PS_DOWN);
  322: }
  323: 
  324: static void
  325: bgp_decision(void *vp)
  326: {
  327:   struct bgp_proto *p = vp;
  328: 
  329:   DBG("BGP: Decision start\n");
  330:   if ((p->p.proto_state == PS_START)
  331:       && (p->outgoing_conn.state == BS_IDLE)
  332:       && (p->incoming_conn.state != BS_OPENCONFIRM)
  333:       && (!p->cf->passive))
  334:     bgp_active(p);
  335: 
  336:   if ((p->p.proto_state == PS_STOP)
  337:       && (p->outgoing_conn.state == BS_IDLE)
  338:       && (p->incoming_conn.state == BS_IDLE))
  339:     bgp_down(p);
  340: }
  341: 
  342: void
  343: bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
  344: {
  345:   proto_notify_state(&p->p, PS_STOP);
  346:   bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
  347:   bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
  348:   ev_schedule(p->event);
  349: }
  350: 
  351: static inline void
  352: bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
  353: {
  354:   if (conn->bgp->p.mrtdump & MD_STATES)
  355:     bgp_dump_state_change(conn, conn->state, new_state);
  356: 
  357:   conn->state = new_state;
  358: }
  359: 
  360: void
  361: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
  362: {
  363:   /* Really, most of the work is done in bgp_rx_open(). */
  364:   bgp_conn_set_state(conn, BS_OPENCONFIRM);
  365: }
  366: 
  367: void
  368: bgp_conn_enter_established_state(struct bgp_conn *conn)
  369: {
  370:   struct bgp_proto *p = conn->bgp;
  371: 
  372:   BGP_TRACE(D_EVENTS, "BGP session established");
  373:   DBG("BGP: UP!!!\n");
  374: 
  375:   /* For multi-hop BGP sessions */
  376:   if (ipa_zero(p->source_addr))
  377:     p->source_addr = conn->sk->saddr;
  378: 
  379:   conn->sk->fast_rx = 0;
  380: 
  381:   p->conn = conn;
  382:   p->last_error_class = 0;
  383:   p->last_error_code = 0;
  384:   p->feed_state = BFS_NONE;
  385:   p->load_state = BFS_NONE;
  386:   bgp_init_bucket_table(p);
  387:   bgp_init_prefix_table(p, 8);
  388: 
  389:   int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
  390: 
  391:   if (p->p.gr_recovery && !peer_gr_ready)
  392:     proto_graceful_restart_unlock(&p->p);
  393: 
  394:   if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
  395:     p->p.gr_wait = 1;
  396: 
  397:   if (p->gr_active == BGP_GRS_ACTIVE)
  398:     tm_stop(p->gr_timer);
  399: 
  400:   /* Check F-bit for regular graceful restart */
  401:   if ((p->gr_active == BGP_GRS_ACTIVE) &&
  402:       (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
  403:     bgp_graceful_restart_done(p);
  404: 
  405:   /* Check F-bit for long-lived graceful restart */
  406:   if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) &&
  407:       (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING)))
  408:     bgp_graceful_restart_done(p);
  409: 
  410:   /* GR capability implies that neighbor will send End-of-RIB */
  411:   if (conn->peer_gr_aware)
  412:     p->load_state = BFS_LOADING;
  413: 
  414:   /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
  415: 
  416:   bgp_conn_set_state(conn, BS_ESTABLISHED);
  417:   proto_notify_state(&p->p, PS_UP);
  418: }
  419: 
  420: static void
  421: bgp_conn_leave_established_state(struct bgp_proto *p)
  422: {
  423:   BGP_TRACE(D_EVENTS, "BGP session closed");
  424:   p->conn = NULL;
  425: 
  426:   bgp_free_prefix_table(p);
  427:   bgp_free_bucket_table(p);
  428: 
  429:   if (p->p.proto_state == PS_UP)
  430:     bgp_stop(p, 0, NULL, 0);
  431: }
  432: 
  433: void
  434: bgp_conn_enter_close_state(struct bgp_conn *conn)
  435: {
  436:   struct bgp_proto *p = conn->bgp;
  437:   int os = conn->state;
  438: 
  439:   bgp_conn_set_state(conn, BS_CLOSE);
  440:   tm_stop(conn->keepalive_timer);
  441:   conn->sk->rx_hook = NULL;
  442: 
  443:   /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
  444:   bgp_start_timer(conn->hold_timer, 10);
  445: 
  446:   if (os == BS_ESTABLISHED)
  447:     bgp_conn_leave_established_state(p);
  448: }
  449: 
  450: void
  451: bgp_conn_enter_idle_state(struct bgp_conn *conn)
  452: {
  453:   struct bgp_proto *p = conn->bgp;
  454:   int os = conn->state;
  455: 
  456:   bgp_close_conn(conn);
  457:   bgp_conn_set_state(conn, BS_IDLE);
  458:   ev_schedule(p->event);
  459: 
  460:   if (os == BS_ESTABLISHED)
  461:     bgp_conn_leave_established_state(p);
  462: }
  463: 
  464: /**
  465:  * bgp_handle_graceful_restart - handle detected BGP graceful restart
  466:  * @p: BGP instance
  467:  *
  468:  * This function is called when a BGP graceful restart of the neighbor is
  469:  * detected (when the TCP connection fails or when a new TCP connection
  470:  * appears). The function activates processing of the restart - starts routing
  471:  * table refresh cycle and activates BGP restart timer. The protocol state goes
  472:  * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
  473:  * caller.
  474:  */
  475: void
  476: bgp_handle_graceful_restart(struct bgp_proto *p)
  477: {
  478:   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
  479: 
  480:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
  481: 	    p->gr_active ? " - already pending" : "");
  482:   proto_notify_state(&p->p, PS_START);
  483: 
  484:   switch (p->gr_active)
  485:   {
  486:   case BGP_GRS_ACTIVE:
  487:     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  488:     break;
  489: 
  490:   case BGP_GRS_LLGR_1:
  491:     rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  492:     return;
  493: 
  494:   case BGP_GRS_LLGR_2:
  495:     rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  496:     rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
  497:     return;
  498:   }
  499: 
  500:   p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0;
  501:   p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1;
  502:   tm_start(p->gr_timer, p->conn->peer_gr_time);
  503:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  504: }
  505: 
  506: /**
  507:  * bgp_graceful_restart_done - finish active BGP graceful restart
  508:  * @p: BGP instance
  509:  *
  510:  * This function is called when the active BGP graceful restart of the neighbor
  511:  * should be finished - either successfully (the neighbor sends all paths and
  512:  * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
  513:  * not support BGP graceful restart on the new session). The function ends
  514:  * routing table refresh cycle and stops BGP restart timer.
  515:  */
  516: void
  517: bgp_graceful_restart_done(struct bgp_proto *p)
  518: {
  519:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
  520:   p->gr_active = 0;
  521:   tm_stop(p->gr_timer);
  522:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  523: }
  524: 
  525: /**
  526:  * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
  527:  * @t: timer
  528:  *
  529:  * This function is a timeout hook for @gr_timer, implementing BGP restart time
  530:  * limit for reestablisment of the BGP session after the graceful restart. When
  531:  * fired, we just proceed with the usual protocol restart.
  532:  */
  533: 
  534: static void
  535: bgp_graceful_restart_timeout(timer *t)
  536: {
  537:   struct bgp_proto *p = t->data;
  538: 
  539:   switch (p->gr_active)
  540:   {
  541:   case BGP_GRS_ACTIVE:
  542:     BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
  543:     bgp_stop(p, 0, NULL, 0);
  544:     return;
  545: 
  546:   case BGP_GRS_LLGR_1:
  547:     BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
  548:     p->gr_active = BGP_GRS_LLGR_2;
  549:     tm_start(p->gr_timer, p->stale_time);
  550:     rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
  551:     return;
  552: 
  553:   case BGP_GRS_LLGR_2:
  554:     BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout");
  555:     p->gr_active = 0;
  556:     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  557:     return;
  558:   }
  559: }
  560: 
  561: /**
  562:  * bgp_refresh_begin - start incoming enhanced route refresh sequence
  563:  * @p: BGP instance
  564:  *
  565:  * This function is called when an incoming enhanced route refresh sequence is
  566:  * started by the neighbor, demarcated by the BoRR packet. The function updates
  567:  * the load state and starts the routing table refresh cycle. Note that graceful
  568:  * restart also uses routing table refresh cycle, but RFC 7313 and load states
  569:  * ensure that these two sequences do not overlap.
  570:  */
  571: void
  572: bgp_refresh_begin(struct bgp_proto *p)
  573: {
  574:   if (p->load_state == BFS_LOADING)
  575:     { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
  576: 
  577:   p->load_state = BFS_REFRESHING;
  578:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  579: }
  580: 
  581: /**
  582:  * bgp_refresh_end - finish incoming enhanced route refresh sequence
  583:  * @p: BGP instance
  584:  *
  585:  * This function is called when an incoming enhanced route refresh sequence is
  586:  * finished by the neighbor, demarcated by the EoRR packet. The function updates
  587:  * the load state and ends the routing table refresh cycle. Routes not received
  588:  * during the sequence are removed by the nest.
  589:  */
  590: void
  591: bgp_refresh_end(struct bgp_proto *p)
  592: {
  593:   if (p->load_state != BFS_REFRESHING)
  594:     { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
  595: 
  596:   p->load_state = BFS_NONE;
  597:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  598: }
  599: 
  600: 
  601: static void
  602: bgp_send_open(struct bgp_conn *conn)
  603: {
  604:   conn->start_state = conn->bgp->start_state;
  605: 
  606:   // Default values, possibly changed by receiving capabilities.
  607:   conn->advertised_as = 0;
  608:   conn->peer_refresh_support = 0;
  609:   conn->peer_as4_support = 0;
  610:   conn->peer_add_path = 0;
  611:   conn->peer_enhanced_refresh_support = 0;
  612:   conn->peer_gr_aware = 0;
  613:   conn->peer_gr_able = 0;
  614:   conn->peer_gr_time = 0;
  615:   conn->peer_gr_flags = 0;
  616:   conn->peer_gr_aflags = 0;
  617:   conn->peer_llgr_aware = 0;
  618:   conn->peer_llgr_able = 0;
  619:   conn->peer_llgr_time = 0;
  620:   conn->peer_llgr_aflags = 0;
  621:   conn->peer_ext_messages_support = 0;
  622: 
  623:   DBG("BGP: Sending open\n");
  624:   conn->sk->rx_hook = bgp_rx;
  625:   conn->sk->tx_hook = bgp_tx;
  626:   tm_stop(conn->connect_retry_timer);
  627:   bgp_schedule_packet(conn, PKT_OPEN);
  628:   bgp_conn_set_state(conn, BS_OPENSENT);
  629:   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
  630: }
  631: 
  632: static void
  633: bgp_connected(sock *sk)
  634: {
  635:   struct bgp_conn *conn = sk->data;
  636:   struct bgp_proto *p = conn->bgp;
  637: 
  638:   BGP_TRACE(D_EVENTS, "Connected");
  639:   bgp_send_open(conn);
  640: }
  641: 
  642: static void
  643: bgp_connect_timeout(timer *t)
  644: {
  645:   struct bgp_conn *conn = t->data;
  646:   struct bgp_proto *p = conn->bgp;
  647: 
  648:   DBG("BGP: connect_timeout\n");
  649:   if (p->p.proto_state == PS_START)
  650:     {
  651:       bgp_close_conn(conn);
  652:       bgp_connect(p);
  653:     }
  654:   else
  655:     bgp_conn_enter_idle_state(conn);
  656: }
  657: 
  658: static void
  659: bgp_sock_err(sock *sk, int err)
  660: {
  661:   struct bgp_conn *conn = sk->data;
  662:   struct bgp_proto *p = conn->bgp;
  663: 
  664:   /*
  665:    * This error hook may be called either asynchronously from main
  666:    * loop, or synchronously from sk_send().  But sk_send() is called
  667:    * only from bgp_tx() and bgp_kick_tx(), which are both called
  668:    * asynchronously from main loop. Moreover, they end if err hook is
  669:    * called. Therefore, we could suppose that it is always called
  670:    * asynchronously.
  671:    */
  672: 
  673:   bgp_store_error(p, conn, BE_SOCKET, err);
  674: 
  675:   if (err)
  676:     BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
  677:   else
  678:     BGP_TRACE(D_EVENTS, "Connection closed");
  679: 
  680:   if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
  681:     bgp_handle_graceful_restart(p);
  682: 
  683:   bgp_conn_enter_idle_state(conn);
  684: }
  685: 
  686: static void
  687: bgp_hold_timeout(timer *t)
  688: {
  689:   struct bgp_conn *conn = t->data;
  690:   struct bgp_proto *p = conn->bgp;
  691: 
  692:   DBG("BGP: Hold timeout\n");
  693: 
  694:   /* We are already closing the connection - just do hangup */
  695:   if (conn->state == BS_CLOSE)
  696:   {
  697:     BGP_TRACE(D_EVENTS, "Connection stalled");
  698:     bgp_conn_enter_idle_state(conn);
  699:     return;
  700:   }
  701: 
  702:   /* If there is something in input queue, we are probably congested
  703:      and perhaps just not processed BGP packets in time. */
  704: 
  705:   if (sk_rx_ready(conn->sk) > 0)
  706:     bgp_start_timer(conn->hold_timer, 10);
  707:   else if ((conn->state == BS_ESTABLISHED) && p->gr_ready && conn->peer_llgr_able)
  708:   {
  709:     BGP_TRACE(D_EVENTS, "Hold timer expired");
  710:     bgp_handle_graceful_restart(p);
  711:     bgp_conn_enter_idle_state(conn);
  712:   }
  713:   else
  714:     bgp_error(conn, 4, 0, NULL, 0);
  715: }
  716: 
  717: static void
  718: bgp_keepalive_timeout(timer *t)
  719: {
  720:   struct bgp_conn *conn = t->data;
  721: 
  722:   DBG("BGP: Keepalive timer\n");
  723:   bgp_schedule_packet(conn, PKT_KEEPALIVE);
  724: 
  725:   /* Kick TX a bit faster */
  726:   if (ev_active(conn->tx_ev))
  727:     ev_run(conn->tx_ev);
  728: }
  729: 
  730: static void
  731: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
  732: {
  733:   timer *t;
  734: 
  735:   conn->sk = NULL;
  736:   conn->bgp = p;
  737:   conn->packets_to_send = 0;
  738: 
  739:   t = conn->connect_retry_timer = tm_new(p->p.pool);
  740:   t->hook = bgp_connect_timeout;
  741:   t->data = conn;
  742:   t = conn->hold_timer = tm_new(p->p.pool);
  743:   t->hook = bgp_hold_timeout;
  744:   t->data = conn;
  745:   t = conn->keepalive_timer = tm_new(p->p.pool);
  746:   t->hook = bgp_keepalive_timeout;
  747:   t->data = conn;
  748:   conn->tx_ev = ev_new(p->p.pool);
  749:   conn->tx_ev->hook = bgp_kick_tx;
  750:   conn->tx_ev->data = conn;
  751: }
  752: 
  753: static void
  754: bgp_setup_sk(struct bgp_conn *conn, sock *s)
  755: {
  756:   s->data = conn;
  757:   s->err_hook = bgp_sock_err;
  758:   s->fast_rx = 1;
  759:   conn->sk = s;
  760: }
  761: 
  762: static void
  763: bgp_active(struct bgp_proto *p)
  764: {
  765:   int delay = MAX(1, p->cf->connect_delay_time);
  766:   struct bgp_conn *conn = &p->outgoing_conn;
  767: 
  768:   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
  769:   bgp_setup_conn(p, conn);
  770:   bgp_conn_set_state(conn, BS_ACTIVE);
  771:   bgp_start_timer(conn->connect_retry_timer, delay);
  772: }
  773: 
  774: /**
  775:  * bgp_connect - initiate an outgoing connection
  776:  * @p: BGP instance
  777:  *
  778:  * The bgp_connect() function creates a new &bgp_conn and initiates
  779:  * a TCP connection to the peer. The rest of connection setup is governed
  780:  * by the BGP state machine as described in the standard.
  781:  */
  782: static void
  783: bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
  784: {
  785:   sock *s;
  786:   struct bgp_conn *conn = &p->outgoing_conn;
  787:   int hops = p->cf->multihop ? : 1;
  788: 
  789:   DBG("BGP: Connecting\n");
  790:   s = sk_new(p->p.pool);
  791:   s->type = SK_TCP_ACTIVE;
  792:   s->saddr = p->source_addr;
  793:   s->daddr = p->cf->remote_ip;
  794:   s->dport = p->cf->remote_port;
  795:   s->iface = p->neigh ? p->neigh->iface : NULL;
  796:   s->vrf = p->p.vrf;
  797:   s->ttl = p->cf->ttl_security ? 255 : hops;
  798:   s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
  799:   s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
  800:   s->tos = IP_PREC_INTERNET_CONTROL;
  801:   s->password = p->cf->password;
  802:   s->tx_hook = bgp_connected;
  803:   BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
  804: 	    s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
  805:   bgp_setup_conn(p, conn);
  806:   bgp_setup_sk(conn, s);
  807:   bgp_conn_set_state(conn, BS_CONNECT);
  808: 
  809:   if (sk_open(s) < 0)
  810:     goto err;
  811: 
  812:   /* Set minimal receive TTL if needed */
  813:   if (p->cf->ttl_security)
  814:     if (sk_set_min_ttl(s, 256 - hops) < 0)
  815:       goto err;
  816: 
  817:   DBG("BGP: Waiting for connect success\n");
  818:   bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
  819:   return;
  820: 
  821:  err:
  822:   sk_log_error(s, p->p.name);
  823:   bgp_sock_err(s, 0);
  824:   return;
  825: }
  826: 
  827: /**
  828:  * bgp_find_proto - find existing proto for incoming connection
  829:  * @sk: TCP socket
  830:  *
  831:  */
  832: static struct bgp_proto *
  833: bgp_find_proto(sock *sk)
  834: {
  835:   struct proto_config *pc;
  836: 
  837:   WALK_LIST(pc, config->protos)
  838:     if ((pc->protocol == &proto_bgp) && pc->proto)
  839:       {
  840: 	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
  841: 	if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
  842: 	    (!p->cf->iface || (p->cf->iface == sk->iface)))
  843: 	  return p;
  844:       }
  845: 
  846:   return NULL;
  847: }
  848: 
  849: /**
  850:  * bgp_incoming_connection - handle an incoming connection
  851:  * @sk: TCP socket
  852:  * @dummy: unused
  853:  *
  854:  * This function serves as a socket hook for accepting of new BGP
  855:  * connections. It searches a BGP instance corresponding to the peer
  856:  * which has connected and if such an instance exists, it creates a
  857:  * &bgp_conn structure, attaches it to the instance and either sends
  858:  * an Open message or (if there already is an active connection) it
  859:  * closes the new connection by sending a Notification message.
  860:  */
  861: static int
  862: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
  863: {
  864:   struct bgp_proto *p;
  865:   int acc, hops;
  866: 
  867:   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
  868:   p = bgp_find_proto(sk);
  869:   if (!p)
  870:     {
  871:       log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
  872: 	  sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
  873:       rfree(sk);
  874:       return 0;
  875:     }
  876: 
  877:   /*
  878:    * BIRD should keep multiple incoming connections in OpenSent state (for
  879:    * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
  880:    * connections are rejected istead. The exception is the case where an
  881:    * incoming connection triggers a graceful restart.
  882:    */
  883: 
  884:   acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
  885:     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
  886: 
  887:   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
  888:     {
  889:       bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
  890:       bgp_handle_graceful_restart(p);
  891:       bgp_conn_enter_idle_state(p->conn);
  892:       acc = 1;
  893: 
  894:       /* There might be separate incoming connection in OpenSent state */
  895:       if (p->incoming_conn.state > BS_ACTIVE)
  896: 	bgp_close_conn(&p->incoming_conn);
  897:     }
  898: 
  899:   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
  900: 	    sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
  901: 	    sk->dport, acc ? "accepted" : "rejected");
  902: 
  903:   if (!acc)
  904:     {
  905:       rfree(sk);
  906:       return 0;
  907:     }
  908: 
  909:   hops = p->cf->multihop ? : 1;
  910: 
  911:   if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
  912:     goto err;
  913: 
  914:   if (p->cf->ttl_security)
  915:     if (sk_set_min_ttl(sk, 256 - hops) < 0)
  916:       goto err;
  917: 
  918:   if (p->cf->enable_extended_messages)
  919:     {
  920:       sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
  921:       sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
  922:       sk_reallocate(sk);
  923:     }
  924: 
  925:   bgp_setup_conn(p, &p->incoming_conn);
  926:   bgp_setup_sk(&p->incoming_conn, sk);
  927:   bgp_send_open(&p->incoming_conn);
  928:   return 0;
  929: 
  930: err:
  931:   sk_log_error(sk, p->p.name);
  932:   log(L_ERR "%s: Incoming connection aborted", p->p.name);
  933:   rfree(sk);
  934:   return 0;
  935: }
  936: 
  937: static void
  938: bgp_listen_sock_err(sock *sk UNUSED, int err)
  939: {
  940:   if (err == ECONNABORTED)
  941:     log(L_WARN "BGP: Incoming connection aborted");
  942:   else
  943:     log(L_ERR "BGP: Error on listening socket: %M", err);
  944: }
  945: 
  946: static sock *
  947: bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
  948: {
  949:   sock *s = sk_new(&root_pool);
  950:   DBG("BGP: Creating listening socket\n");
  951:   s->type = SK_TCP_PASSIVE;
  952:   s->ttl = 255;
  953:   s->saddr = addr;
  954:   s->sport = port ? port : BGP_PORT;
  955:   s->flags = flags ? 0 : SKF_V6ONLY;
  956:   s->tos = IP_PREC_INTERNET_CONTROL;
  957:   s->rbsize = BGP_RX_BUFFER_SIZE;
  958:   s->tbsize = BGP_TX_BUFFER_SIZE;
  959:   s->rx_hook = bgp_incoming_connection;
  960:   s->err_hook = bgp_listen_sock_err;
  961: 
  962:   if (sk_open(s) < 0)
  963:     goto err;
  964: 
  965:   return s;
  966: 
  967:  err:
  968:   sk_log_error(s, "BGP");
  969:   log(L_ERR "BGP: Cannot open listening socket");
  970:   rfree(s);
  971:   return NULL;
  972: }
  973: 
  974: static void
  975: bgp_start_neighbor(struct bgp_proto *p)
  976: {
  977:   /* Called only for single-hop BGP sessions */
  978: 
  979:   if (ipa_zero(p->source_addr))
  980:     p->source_addr = p->neigh->ifa->ip;
  981: 
  982: #ifdef IPV6
  983:   {
  984:     struct ifa *a;
  985:     p->local_link = IPA_NONE;
  986:     WALK_LIST(a, p->neigh->iface->addrs)
  987:       if (a->scope == SCOPE_LINK)
  988:         {
  989: 	  p->local_link = a->ip;
  990: 	  break;
  991: 	}
  992: 
  993:     if (! ipa_nonzero(p->local_link))
  994:       log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
  995: 
  996:     DBG("BGP: Selected link-level address %I\n", p->local_link);
  997:   }
  998: #endif
  999: 
 1000:   bgp_initiate(p);
 1001: }
 1002: 
 1003: static void
 1004: bgp_neigh_notify(neighbor *n)
 1005: {
 1006:   struct bgp_proto *p = (struct bgp_proto *) n->proto;
 1007:   int ps = p->p.proto_state;
 1008: 
 1009:   if (n != p->neigh)
 1010:     return;
 1011: 
 1012:   if ((ps == PS_DOWN) || (ps == PS_STOP))
 1013:     return;
 1014: 
 1015:   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
 1016: 
 1017:   if (n->scope <= 0)
 1018:     {
 1019:       if (!prepare)
 1020:         {
 1021: 	  BGP_TRACE(D_EVENTS, "Neighbor lost");
 1022: 	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
 1023: 	  /* Perhaps also run bgp_update_startup_delay(p)? */
 1024: 	  bgp_stop(p, 0, NULL, 0);
 1025: 	}
 1026:     }
 1027:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
 1028:     {
 1029:       if (!prepare)
 1030:         {
 1031: 	  BGP_TRACE(D_EVENTS, "Link down");
 1032: 	  bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
 1033: 	  if (ps == PS_UP)
 1034: 	    bgp_update_startup_delay(p);
 1035: 	  bgp_stop(p, 0, NULL, 0);
 1036: 	}
 1037:     }
 1038:   else
 1039:     {
 1040:       if (prepare)
 1041: 	{
 1042: 	  BGP_TRACE(D_EVENTS, "Neighbor ready");
 1043: 	  bgp_start_neighbor(p);
 1044: 	}
 1045:     }
 1046: }
 1047: 
 1048: static void
 1049: bgp_bfd_notify(struct bfd_request *req)
 1050: {
 1051:   struct bgp_proto *p = req->data;
 1052:   int ps = p->p.proto_state;
 1053: 
 1054:   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
 1055:   {
 1056:     BGP_TRACE(D_EVENTS, "BFD session down");
 1057:     bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
 1058: 
 1059:     if (p->cf->bfd == BGP_BFD_GRACEFUL)
 1060:     {
 1061:       /* Trigger graceful restart */
 1062:       if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
 1063: 	bgp_handle_graceful_restart(p);
 1064: 
 1065:       if (p->incoming_conn.state > BS_IDLE)
 1066: 	bgp_conn_enter_idle_state(&p->incoming_conn);
 1067: 
 1068:       if (p->outgoing_conn.state > BS_IDLE)
 1069: 	bgp_conn_enter_idle_state(&p->outgoing_conn);
 1070:     }
 1071:     else
 1072:     {
 1073:       /* Trigger session down */
 1074:       if (ps == PS_UP)
 1075: 	bgp_update_startup_delay(p);
 1076:       bgp_stop(p, 0, NULL, 0);
 1077:     }
 1078:   }
 1079: }
 1080: 
 1081: static void
 1082: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
 1083: {
 1084:   if (use_bfd && !p->bfd_req)
 1085:     p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
 1086: 				     p->cf->multihop ? NULL : p->neigh->iface,
 1087: 				     p->p.vrf, bgp_bfd_notify, p);
 1088: 
 1089:   if (!use_bfd && p->bfd_req)
 1090:     {
 1091:       rfree(p->bfd_req);
 1092:       p->bfd_req = NULL;
 1093:     }
 1094: }
 1095: 
 1096: static int
 1097: bgp_reload_routes(struct proto *P)
 1098: {
 1099:   struct bgp_proto *p = (struct bgp_proto *) P;
 1100:   if (!p->conn || !p->conn->peer_refresh_support)
 1101:     return 0;
 1102: 
 1103:   bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
 1104:   return 1;
 1105: }
 1106: 
 1107: static void
 1108: bgp_feed_begin(struct proto *P, int initial)
 1109: {
 1110:   struct bgp_proto *p = (struct bgp_proto *) P;
 1111: 
 1112:   /* This should not happen */
 1113:   if (!p->conn)
 1114:     return;
 1115: 
 1116:   if (initial && p->cf->gr_mode)
 1117:     p->feed_state = BFS_LOADING;
 1118: 
 1119:   /* It is refeed and both sides support enhanced route refresh */
 1120:   if (!initial && p->cf->enable_refresh &&
 1121:       p->conn->peer_enhanced_refresh_support)
 1122:     {
 1123:       /* BoRR must not be sent before End-of-RIB */
 1124:       if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
 1125: 	return;
 1126: 
 1127:       p->feed_state = BFS_REFRESHING;
 1128:       bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
 1129:     }
 1130: }
 1131: 
 1132: static void
 1133: bgp_feed_end(struct proto *P)
 1134: {
 1135:   struct bgp_proto *p = (struct bgp_proto *) P;
 1136: 
 1137:   /* This should not happen */
 1138:   if (!p->conn)
 1139:     return;
 1140: 
 1141:   /* Non-demarcated feed ended, nothing to do */
 1142:   if (p->feed_state == BFS_NONE)
 1143:     return;
 1144: 
 1145:   /* Schedule End-of-RIB packet */
 1146:   if (p->feed_state == BFS_LOADING)
 1147:     p->feed_state = BFS_LOADED;
 1148: 
 1149:   /* Schedule EoRR packet */
 1150:   if (p->feed_state == BFS_REFRESHING)
 1151:     p->feed_state = BFS_REFRESHED;
 1152: 
 1153:   /* Kick TX hook */
 1154:   bgp_schedule_packet(p->conn, PKT_UPDATE);
 1155: }
 1156: 
 1157: 
 1158: static void
 1159: bgp_start_locked(struct object_lock *lock)
 1160: {
 1161:   struct bgp_proto *p = lock->data;
 1162:   struct bgp_config *cf = p->cf;
 1163: 
 1164:   if (p->p.proto_state != PS_START)
 1165:     {
 1166:       DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
 1167:       return;
 1168:     }
 1169: 
 1170:   DBG("BGP: Got lock\n");
 1171: 
 1172:   if (cf->multihop)
 1173:     {
 1174:       /* Multi-hop sessions do not use neighbor entries */
 1175:       bgp_initiate(p);
 1176:       return;
 1177:     }
 1178: 
 1179:   neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
 1180:   if (!n)
 1181:     {
 1182:       log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
 1183:       /* As we do not start yet, we can just disable protocol */
 1184:       p->p.disabled = 1;
 1185:       bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
 1186:       proto_notify_state(&p->p, PS_DOWN);
 1187:       return;
 1188:     }
 1189: 
 1190:   p->neigh = n;
 1191: 
 1192:   if (n->scope <= 0)
 1193:     BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
 1194:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
 1195:     BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
 1196:   else
 1197:     bgp_start_neighbor(p);
 1198: }
 1199: 
 1200: static int
 1201: bgp_start(struct proto *P)
 1202: {
 1203:   struct bgp_proto *p = (struct bgp_proto *) P;
 1204:   struct object_lock *lock;
 1205: 
 1206:   DBG("BGP: Startup.\n");
 1207:   p->start_state = BSS_PREPARE;
 1208:   p->outgoing_conn.state = BS_IDLE;
 1209:   p->incoming_conn.state = BS_IDLE;
 1210:   p->neigh = NULL;
 1211:   p->bfd_req = NULL;
 1212:   p->gr_ready = 0;
 1213:   p->gr_active = 0;
 1214: 
 1215:   rt_lock_table(p->igp_table);
 1216: 
 1217:   p->event = ev_new(p->p.pool);
 1218:   p->event->hook = bgp_decision;
 1219:   p->event->data = p;
 1220: 
 1221:   p->startup_timer = tm_new(p->p.pool);
 1222:   p->startup_timer->hook = bgp_startup_timeout;
 1223:   p->startup_timer->data = p;
 1224: 
 1225:   p->gr_timer = tm_new(p->p.pool);
 1226:   p->gr_timer->hook = bgp_graceful_restart_timeout;
 1227:   p->gr_timer->data = p;
 1228: 
 1229:   p->local_id = proto_get_router_id(P->cf);
 1230:   if (p->rr_client)
 1231:     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
 1232: 
 1233:   p->remote_id = 0;
 1234:   p->source_addr = p->cf->source_addr;
 1235: 
 1236:   if (p->p.gr_recovery && p->cf->gr_mode)
 1237:     proto_graceful_restart_lock(P);
 1238: 
 1239:   /*
 1240:    *  Before attempting to create the connection, we need to lock the
 1241:    *  port, so that are sure we're the only instance attempting to talk
 1242:    *  with that neighbor.
 1243:    */
 1244: 
 1245:   lock = p->lock = olock_new(P->pool);
 1246:   lock->addr = p->cf->remote_ip;
 1247:   lock->port = p->cf->remote_port;
 1248:   lock->iface = p->cf->iface;
 1249:   lock->vrf = p->cf->iface ? NULL : p->p.vrf;
 1250:   lock->type = OBJLOCK_TCP;
 1251:   lock->hook = bgp_start_locked;
 1252:   lock->data = p;
 1253:   olock_acquire(lock);
 1254: 
 1255:   return PS_START;
 1256: }
 1257: 
 1258: extern int proto_restart;
 1259: 
 1260: static int
 1261: bgp_shutdown(struct proto *P)
 1262: {
 1263:   struct bgp_proto *p = (struct bgp_proto *) P;
 1264:   uint subcode = 0;
 1265: 
 1266:   char *message = NULL;
 1267:   byte *data = NULL;
 1268:   uint len = 0;
 1269: 
 1270:   BGP_TRACE(D_EVENTS, "Shutdown requested");
 1271: 
 1272:   switch (P->down_code)
 1273:     {
 1274:     case PDC_CF_REMOVE:
 1275:     case PDC_CF_DISABLE:
 1276:       subcode = 3; // Errcode 6, 3 - peer de-configured
 1277:       break;
 1278: 
 1279:     case PDC_CF_RESTART:
 1280:       subcode = 6; // Errcode 6, 6 - other configuration change
 1281:       break;
 1282: 
 1283:     case PDC_CMD_DISABLE:
 1284:     case PDC_CMD_SHUTDOWN:
 1285:       subcode = 2; // Errcode 6, 2 - administrative shutdown
 1286:       message = P->message;
 1287:       break;
 1288: 
 1289:     case PDC_CMD_RESTART:
 1290:       subcode = 4; // Errcode 6, 4 - administrative reset
 1291:       message = P->message;
 1292:       break;
 1293: 
 1294:     case PDC_RX_LIMIT_HIT:
 1295:     case PDC_IN_LIMIT_HIT:
 1296:       subcode = 1; // Errcode 6, 1 - max number of prefixes reached
 1297:       /* log message for compatibility */
 1298:       log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
 1299:       goto limit;
 1300: 
 1301:     case PDC_OUT_LIMIT_HIT:
 1302:       subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
 1303: 
 1304:     limit:
 1305:       bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
 1306:       if (proto_restart)
 1307: 	bgp_update_startup_delay(p);
 1308:       else
 1309: 	p->startup_delay = 0;
 1310:       goto done;
 1311:     }
 1312: 
 1313:   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
 1314:   p->startup_delay = 0;
 1315: 
 1316:   /* RFC 8203 - shutdown communication */
 1317:   if (message)
 1318:   {
 1319:     uint msg_len = strlen(message);
 1320:     msg_len = MIN(msg_len, 255);
 1321: 
 1322:     /* Buffer will be freed automatically by protocol shutdown */
 1323:     data = mb_alloc(p->p.pool, msg_len + 1);
 1324:     len = msg_len + 1;
 1325: 
 1326:     data[0] = msg_len;
 1327:     memcpy(data+1, message, msg_len);
 1328:   }
 1329: 
 1330: done:
 1331:   bgp_stop(p, subcode, data, len);
 1332:   return p->p.proto_state;
 1333: }
 1334: 
 1335: static void
 1336: bgp_cleanup(struct proto *P)
 1337: {
 1338:   struct bgp_proto *p = (struct bgp_proto *) P;
 1339:   rt_unlock_table(p->igp_table);
 1340: }
 1341: 
 1342: static rtable *
 1343: get_igp_table(struct bgp_config *cf)
 1344: {
 1345:   return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
 1346: }
 1347: 
 1348: static struct proto *
 1349: bgp_init(struct proto_config *C)
 1350: {
 1351:   struct proto *P = proto_new(C, sizeof(struct bgp_proto));
 1352:   struct bgp_config *c = (struct bgp_config *) C;
 1353:   struct bgp_proto *p = (struct bgp_proto *) P;
 1354: 
 1355:   P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
 1356:   P->rt_notify = bgp_rt_notify;
 1357:   P->import_control = bgp_import_control;
 1358:   P->neigh_notify = bgp_neigh_notify;
 1359:   P->reload_routes = bgp_reload_routes;
 1360:   P->feed_begin = bgp_feed_begin;
 1361:   P->feed_end = bgp_feed_end;
 1362:   P->rte_better = bgp_rte_better;
 1363:   P->rte_mergable = bgp_rte_mergable;
 1364:   P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
 1365:   P->rte_modify = bgp_rte_modify_stale;
 1366: 
 1367:   p->cf = c;
 1368:   p->local_as = c->local_as;
 1369:   p->remote_as = c->remote_as;
 1370:   p->is_internal = (c->local_as == c->remote_as);
 1371:   p->rs_client = c->rs_client;
 1372:   p->rr_client = c->rr_client;
 1373:   p->igp_table = get_igp_table(c);
 1374: 
 1375:   return P;
 1376: }
 1377: 
 1378: 
 1379: void
 1380: bgp_check_config(struct bgp_config *c)
 1381: {
 1382:   int internal = (c->local_as == c->remote_as);
 1383: 
 1384:   /* Do not check templates at all */
 1385:   if (c->c.class == SYM_TEMPLATE)
 1386:     return;
 1387: 
 1388: 
 1389:   /* EBGP direct by default, IBGP multihop by default */
 1390:   if (c->multihop < 0)
 1391:     c->multihop = internal ? 64 : 0;
 1392: 
 1393:   /* Different default for gw_mode */
 1394:   if (!c->gw_mode)
 1395:     c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
 1396: 
 1397:   /* Different default based on rs_client */
 1398:   if (!c->missing_lladdr)
 1399:     c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
 1400: 
 1401:   /* LLGR mode default based on GR mode */
 1402:   if (c->llgr_mode < 0)
 1403:     c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0;
 1404: 
 1405:   /* Disable after error incompatible with restart limit action */
 1406:   if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
 1407:     c->c.in_limit->action = PLA_DISABLE;
 1408: 
 1409: 
 1410:   if (!c->local_as)
 1411:     cf_error("Local AS number must be set");
 1412: 
 1413:   if (ipa_zero(c->remote_ip))
 1414:     cf_error("Neighbor must be configured");
 1415: 
 1416:   if (!c->remote_as)
 1417:     cf_error("Remote AS number must be set");
 1418: 
 1419:   if (ipa_is_link_local(c->remote_ip) && !c->iface)
 1420:     cf_error("Link-local neighbor address requires specified interface");
 1421: 
 1422:   if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
 1423:     cf_error("Neighbor AS number out of range (AS4 not available)");
 1424: 
 1425:   if (!internal && c->rr_client)
 1426:     cf_error("Only internal neighbor can be RR client");
 1427: 
 1428:   if (internal && c->rs_client)
 1429:     cf_error("Only external neighbor can be RS client");
 1430: 
 1431:   if (c->multihop && (c->gw_mode == GW_DIRECT))
 1432:     cf_error("Multihop BGP cannot use direct gateway mode");
 1433: 
 1434:   if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
 1435: 		      ipa_is_link_local(c->source_addr)))
 1436:     cf_error("Multihop BGP cannot be used with link-local addresses");
 1437: 
 1438:   if (c->multihop && c->iface)
 1439:     cf_error("Multihop BGP cannot be bound to interface");
 1440: 
 1441:   if (c->multihop && c->check_link)
 1442:     cf_error("Multihop BGP cannot depend on link state");
 1443: 
 1444:   if (c->multihop && c->bfd && ipa_zero(c->source_addr))
 1445:     cf_error("Multihop BGP with BFD requires specified source address");
 1446: 
 1447:   if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
 1448:     cf_error("BGP in recursive mode prohibits sorted table");
 1449: 
 1450:   if (c->deterministic_med && c->c.table->sorted)
 1451:     cf_error("BGP with deterministic MED prohibits sorted table");
 1452: 
 1453:   if (c->secondary && !c->c.table->sorted)
 1454:     cf_error("BGP with secondary option requires sorted table");
 1455: 
 1456:   if (!c->gr_mode && c->llgr_mode)
 1457:     cf_error("Long-lived graceful restart requires basic graceful restart");
 1458: }
 1459: 
 1460: static int
 1461: bgp_reconfigure(struct proto *P, struct proto_config *C)
 1462: {
 1463:   struct bgp_config *new = (struct bgp_config *) C;
 1464:   struct bgp_proto *p = (struct bgp_proto *) P;
 1465:   struct bgp_config *old = p->cf;
 1466: 
 1467:   if (proto_get_router_id(C) != p->local_id)
 1468:     return 0;
 1469: 
 1470:   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
 1471: 		     ((byte *) new) + sizeof(struct proto_config),
 1472: 		     // password item is last and must be checked separately
 1473: 		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
 1474:     && ((!old->password && !new->password)
 1475: 	|| (old->password && new->password && !strcmp(old->password, new->password)))
 1476:     && (get_igp_table(old) == get_igp_table(new));
 1477: 
 1478:   if (same && (p->start_state > BSS_PREPARE))
 1479:     bgp_update_bfd(p, new->bfd);
 1480: 
 1481:   /* We should update our copy of configuration ptr as old configuration will be freed */
 1482:   if (same)
 1483:     p->cf = new;
 1484: 
 1485:   return same;
 1486: }
 1487: 
 1488: static void
 1489: bgp_copy_config(struct proto_config *dest, struct proto_config *src)
 1490: {
 1491:   /* Just a shallow copy */
 1492:   proto_copy_rest(dest, src, sizeof(struct bgp_config));
 1493: }
 1494: 
 1495: 
 1496: /**
 1497:  * bgp_error - report a protocol error
 1498:  * @c: connection
 1499:  * @code: error code (according to the RFC)
 1500:  * @subcode: error sub-code
 1501:  * @data: data to be passed in the Notification message
 1502:  * @len: length of the data
 1503:  *
 1504:  * bgp_error() sends a notification packet to tell the other side that a protocol
 1505:  * error has occurred (including the data considered erroneous if possible) and
 1506:  * closes the connection.
 1507:  */
 1508: void
 1509: bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
 1510: {
 1511:   struct bgp_proto *p = c->bgp;
 1512: 
 1513:   if (c->state == BS_CLOSE)
 1514:     return;
 1515: 
 1516:   bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
 1517:   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
 1518:   bgp_conn_enter_close_state(c);
 1519: 
 1520:   c->notify_code = code;
 1521:   c->notify_subcode = subcode;
 1522:   c->notify_data = data;
 1523:   c->notify_size = (len > 0) ? len : 0;
 1524:   bgp_schedule_packet(c, PKT_NOTIFICATION);
 1525: 
 1526:   if (code != 6)
 1527:     {
 1528:       bgp_update_startup_delay(p);
 1529:       bgp_stop(p, 0, NULL, 0);
 1530:     }
 1531: }
 1532: 
 1533: /**
 1534:  * bgp_store_error - store last error for status report
 1535:  * @p: BGP instance
 1536:  * @c: connection
 1537:  * @class: error class (BE_xxx constants)
 1538:  * @code: error code (class specific)
 1539:  *
 1540:  * bgp_store_error() decides whether given error is interesting enough
 1541:  * and store that error to last_error variables of @p
 1542:  */
 1543: void
 1544: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
 1545: {
 1546:   /* During PS_UP, we ignore errors on secondary connection */
 1547:   if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
 1548:     return;
 1549: 
 1550:   /* During PS_STOP, we ignore any errors, as we want to report
 1551:    * the error that caused transition to PS_STOP
 1552:    */
 1553:   if (p->p.proto_state == PS_STOP)
 1554:     return;
 1555: 
 1556:   p->last_error_class = class;
 1557:   p->last_error_code = code;
 1558: }
 1559: 
 1560: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
 1561: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
 1562: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
 1563: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
 1564: 
 1565: static const char *
 1566: bgp_last_errmsg(struct bgp_proto *p)
 1567: {
 1568:   switch (p->last_error_class)
 1569:     {
 1570:     case BE_MISC:
 1571:       return bgp_misc_errors[p->last_error_code];
 1572:     case BE_SOCKET:
 1573:       return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
 1574:     case BE_BGP_RX:
 1575:     case BE_BGP_TX:
 1576:       return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
 1577:     case BE_AUTO_DOWN:
 1578:       return bgp_auto_errors[p->last_error_code];
 1579:     default:
 1580:       return "";
 1581:     }
 1582: }
 1583: 
 1584: static const char *
 1585: bgp_state_dsc(struct bgp_proto *p)
 1586: {
 1587:   if (p->p.proto_state == PS_DOWN)
 1588:     return "Down";
 1589: 
 1590:   int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
 1591:   if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
 1592:     return "Passive";
 1593: 
 1594:   return bgp_state_names[state];
 1595: }
 1596: 
 1597: static void
 1598: bgp_get_status(struct proto *P, byte *buf)
 1599: {
 1600:   struct bgp_proto *p = (struct bgp_proto *) P;
 1601: 
 1602:   const char *err1 = bgp_err_classes[p->last_error_class];
 1603:   const char *err2 = bgp_last_errmsg(p);
 1604: 
 1605:   if (P->proto_state == PS_DOWN)
 1606:     bsprintf(buf, "%s%s", err1, err2);
 1607:   else
 1608:     bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
 1609: }
 1610: 
 1611: static void
 1612: bgp_show_proto_info(struct proto *P)
 1613: {
 1614:   struct bgp_proto *p = (struct bgp_proto *) P;
 1615:   struct bgp_conn *c = p->conn;
 1616: 
 1617:   proto_show_basic_info(P);
 1618: 
 1619:   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
 1620:   cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
 1621:   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
 1622: 
 1623:   if (p->gr_active)
 1624:     cli_msg(-1006, "    Neighbor graceful restart active");
 1625: 
 1626:   if (p->gr_active && p->gr_timer->expires)
 1627:     cli_msg(-1006, "    %-15s   %d/-",
 1628: 	    (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:",
 1629: 	    p->gr_timer->expires - now);
 1630: 
 1631:   if (P->proto_state == PS_START)
 1632:     {
 1633:       struct bgp_conn *oc = &p->outgoing_conn;
 1634: 
 1635:       if ((p->start_state < BSS_CONNECT) &&
 1636: 	  (p->startup_timer->expires))
 1637: 	cli_msg(-1006, "    Error wait:       %d/%d",
 1638: 		p->startup_timer->expires - now, p->startup_delay);
 1639: 
 1640:       if ((oc->state == BS_ACTIVE) &&
 1641: 	  (oc->connect_retry_timer->expires))
 1642: 	cli_msg(-1006, "    Connect delay:    %d/%d",
 1643: 		oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
 1644:     }
 1645:   else if (P->proto_state == PS_UP)
 1646:     {
 1647:       cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
 1648:       cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
 1649: 	      c->peer_refresh_support ? " refresh" : "",
 1650: 	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
 1651: 	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
 1652: 	      c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""),
 1653: 	      c->peer_as4_support ? " AS4" : "",
 1654: 	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
 1655: 	      (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
 1656: 	      c->peer_ext_messages_support ? " ext-messages" : "");
 1657:       cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
 1658: 	      p->is_internal ? "internal" : "external",
 1659: 	      p->cf->multihop ? " multihop" : "",
 1660: 	      p->rr_client ? " route-reflector" : "",
 1661: 	      p->rs_client ? " route-server" : "",
 1662: 	      p->as4_session ? " AS4" : "",
 1663: 	      p->add_path_rx ? " add-path-rx" : "",
 1664: 	      p->add_path_tx ? " add-path-tx" : "",
 1665: 	      p->ext_messages ? " ext-messages" : "");
 1666:       cli_msg(-1006, "    Source address:   %I", p->source_addr);
 1667:       if (P->cf->in_limit)
 1668: 	cli_msg(-1006, "    Route limit:      %d/%d",
 1669: 		p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
 1670:       cli_msg(-1006, "    Hold timer:       %d/%d",
 1671: 	      tm_remains(c->hold_timer), c->hold_time);
 1672:       cli_msg(-1006, "    Keepalive timer:  %d/%d",
 1673: 	      tm_remains(c->keepalive_timer), c->keepalive_time);
 1674:     }
 1675: 
 1676:   if ((p->last_error_class != BE_NONE) &&
 1677:       (p->last_error_class != BE_MAN_DOWN))
 1678:     {
 1679:       const char *err1 = bgp_err_classes[p->last_error_class];
 1680:       const char *err2 = bgp_last_errmsg(p);
 1681:       cli_msg(-1006, "    Last error:       %s%s", err1, err2);
 1682:     }
 1683: }
 1684: 
 1685: struct protocol proto_bgp = {
 1686:   .name = 		"BGP",
 1687:   .template = 		"bgp%d",
 1688:   .attr_class = 	EAP_BGP,
 1689:   .preference = 	DEF_PREF_BGP,
 1690:   .config_size =	sizeof(struct bgp_config),
 1691:   .init = 		bgp_init,
 1692:   .start = 		bgp_start,
 1693:   .shutdown = 		bgp_shutdown,
 1694:   .cleanup = 		bgp_cleanup,
 1695:   .reconfigure = 	bgp_reconfigure,
 1696:   .copy_config = 	bgp_copy_config,
 1697:   .get_status = 	bgp_get_status,
 1698:   .get_attr = 		bgp_get_attr,
 1699:   .get_route_info = 	bgp_get_route_info,
 1700:   .show_proto_info = 	bgp_show_proto_info
 1701: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>