File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / proto / bgp / bgp.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Aug 22 12:33:54 2017 UTC (6 years, 11 months ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_3p0, v1_6_3, HEAD
bird 1.6.3

    1: /*
    2:  *	BIRD -- The Border Gateway Protocol
    3:  *
    4:  *	(c) 2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: /**
   10:  * DOC: Border Gateway Protocol
   11:  *
   12:  * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
   13:  * connection and most of the interface with BIRD core, |packets.c| handling
   14:  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
   15:  * manipulation with BGP attribute lists.
   16:  *
   17:  * As opposed to the other existing routing daemons, BIRD has a sophisticated core
   18:  * architecture which is able to keep all the information needed by BGP in the
   19:  * primary routing table, therefore no complex data structures like a central
   20:  * BGP table are needed. This increases memory footprint of a BGP router with
   21:  * many connections, but not too much and, which is more important, it makes
   22:  * BGP much easier to implement.
   23:  *
   24:  * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
   25:  * structure to which are attached individual connections represented by &bgp_connection
   26:  * (usually, there exists only one connection, but during BGP session setup, there
   27:  * can be more of them). The connections are handled according to the BGP state machine
   28:  * defined in the RFC with all the timers and all the parameters configurable.
   29:  *
   30:  * In incoming direction, we listen on the connection's socket and each time we receive
   31:  * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
   32:  * passes complete packets to bgp_rx_packet() which distributes the packet according
   33:  * to its type.
   34:  *
   35:  * In outgoing direction, we gather all the routing updates and sort them to buckets
   36:  * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
   37:  * of &rta's and a &fib which helps us to find if we already have another route for
   38:  * the same destination queued for sending, so that we can replace it with the new one
   39:  * immediately instead of sending both updates). There also exists a special bucket holding
   40:  * all the route withdrawals which cannot be queued anywhere else as they don't have any
   41:  * attributes. If we have any packet to send (due to either new routes or the connection
   42:  * tracking code wanting to send a Open, Keepalive or Notification message), we call
   43:  * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
   44:  * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
   45:  * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
   46:  * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
   47:  * type if we have more data of the same type to send.
   48:  *
   49:  * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
   50:  * of the attribute blocks and translating them to the language of BIRD's extended attributes
   51:  * and bgp_encode_attrs() which does the converse. Both functions are built around a
   52:  * @bgp_attr_table array describing all important characteristics of all known attributes.
   53:  * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
   54:  *
   55:  * BGP protocol implements graceful restart in both restarting (local restart)
   56:  * and receiving (neighbor restart) roles. The first is handled mostly by the
   57:  * graceful restart code in the nest, BGP protocol just handles capabilities,
   58:  * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
   59:  * The second is implemented by internal restart of the BGP state to %BS_IDLE
   60:  * and protocol state to %PS_START, but keeping the protocol up from the core
   61:  * point of view and therefore maintaining received routes. Routing table
   62:  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
   63:  * stale routes after reestablishment of BGP session during graceful restart.
   64:  */
   65: 
   66: #undef LOCAL_DEBUG
   67: 
   68: #include "nest/bird.h"
   69: #include "nest/iface.h"
   70: #include "nest/protocol.h"
   71: #include "nest/route.h"
   72: #include "nest/cli.h"
   73: #include "nest/locks.h"
   74: #include "conf/conf.h"
   75: #include "lib/socket.h"
   76: #include "lib/resource.h"
   77: #include "lib/string.h"
   78: 
   79: #include "bgp.h"
   80: 
   81: 
   82: struct linpool *bgp_linpool;		/* Global temporary pool */
   83: static sock *bgp_listen_sk;		/* Global listening socket */
   84: static int bgp_counter;			/* Number of protocol instances using the listening socket */
   85: 
   86: static void bgp_close(struct bgp_proto *p, int apply_md5);
   87: static void bgp_connect(struct bgp_proto *p);
   88: static void bgp_active(struct bgp_proto *p);
   89: static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
   90: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
   91: 
   92: 
   93: /**
   94:  * bgp_open - open a BGP instance
   95:  * @p: BGP instance
   96:  *
   97:  * This function allocates and configures shared BGP resources.
   98:  * Should be called as the last step during initialization
   99:  * (when lock is acquired and neighbor is ready).
  100:  * When error, state changed to PS_DOWN, -1 is returned and caller
  101:  * should return immediately.
  102:  */
  103: static int
  104: bgp_open(struct bgp_proto *p)
  105: {
  106:   struct config *cfg = p->cf->c.global;
  107:   int errcode;
  108: 
  109:   if (!bgp_listen_sk)
  110:     bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
  111: 
  112:   if (!bgp_listen_sk)
  113:     {
  114:       errcode = BEM_NO_SOCKET;
  115:       goto err;
  116:     }
  117: 
  118:   if (!bgp_linpool)
  119:     bgp_linpool = lp_new(&root_pool, 4080);
  120: 
  121:   bgp_counter++;
  122: 
  123:   if (p->cf->password)
  124:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
  125: 			p->cf->iface, p->cf->password, p->cf->setkey) < 0)
  126:       {
  127: 	sk_log_error(bgp_listen_sk, p->p.name);
  128: 	bgp_close(p, 0);
  129: 	errcode = BEM_INVALID_MD5;
  130: 	goto err;
  131:       }
  132: 
  133:   return 0;
  134: 
  135: err:
  136:   p->p.disabled = 1;
  137:   bgp_store_error(p, NULL, BE_MISC, errcode);
  138:   proto_notify_state(&p->p, PS_DOWN);
  139:   return -1;
  140: }
  141: 
  142: static void
  143: bgp_startup(struct bgp_proto *p)
  144: {
  145:   BGP_TRACE(D_EVENTS, "Started");
  146:   p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
  147: 
  148:   if (!p->cf->passive)
  149:     bgp_active(p);
  150: }
  151: 
  152: static void
  153: bgp_startup_timeout(timer *t)
  154: {
  155:   bgp_startup(t->data);
  156: }
  157: 
  158: 
  159: static void
  160: bgp_initiate(struct bgp_proto *p)
  161: {
  162:   int rv = bgp_open(p);
  163:   if (rv < 0)
  164:     return;
  165: 
  166:   if (p->cf->bfd)
  167:     bgp_update_bfd(p, p->cf->bfd);
  168: 
  169:   if (p->startup_delay)
  170:     {
  171:       p->start_state = BSS_DELAY;
  172:       BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
  173:       bgp_start_timer(p->startup_timer, p->startup_delay);
  174:     }
  175:   else
  176:     bgp_startup(p);
  177: }
  178: 
  179: /**
  180:  * bgp_close - close a BGP instance
  181:  * @p: BGP instance
  182:  * @apply_md5: 0 to disable unsetting MD5 auth
  183:  *
  184:  * This function frees and deconfigures shared BGP resources.
  185:  * @apply_md5 is set to 0 when bgp_close is called as a cleanup
  186:  * from failed bgp_open().
  187:  */
  188: static void
  189: bgp_close(struct bgp_proto *p, int apply_md5)
  190: {
  191:   ASSERT(bgp_counter);
  192:   bgp_counter--;
  193: 
  194:   if (p->cf->password && apply_md5)
  195:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
  196: 			p->cf->iface, NULL, p->cf->setkey) < 0)
  197:       sk_log_error(bgp_listen_sk, p->p.name);
  198: 
  199:   if (!bgp_counter)
  200:     {
  201:       rfree(bgp_listen_sk);
  202:       bgp_listen_sk = NULL;
  203:       rfree(bgp_linpool);
  204:       bgp_linpool = NULL;
  205:     }
  206: }
  207: 
  208: /**
  209:  * bgp_start_timer - start a BGP timer
  210:  * @t: timer
  211:  * @value: time to fire (0 to disable the timer)
  212:  *
  213:  * This functions calls tm_start() on @t with time @value and the
  214:  * amount of randomization suggested by the BGP standard. Please use
  215:  * it for all BGP timers.
  216:  */
  217: void
  218: bgp_start_timer(timer *t, int value)
  219: {
  220:   if (value)
  221:     {
  222:       /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
  223:       t->randomize = value / 4;
  224:       tm_start(t, value - t->randomize);
  225:     }
  226:   else
  227:     tm_stop(t);
  228: }
  229: 
  230: /**
  231:  * bgp_close_conn - close a BGP connection
  232:  * @conn: connection to close
  233:  *
  234:  * This function takes a connection described by the &bgp_conn structure,
  235:  * closes its socket and frees all resources associated with it.
  236:  */
  237: void
  238: bgp_close_conn(struct bgp_conn *conn)
  239: {
  240:   // struct bgp_proto *p = conn->bgp;
  241: 
  242:   DBG("BGP: Closing connection\n");
  243:   conn->packets_to_send = 0;
  244:   rfree(conn->connect_retry_timer);
  245:   conn->connect_retry_timer = NULL;
  246:   rfree(conn->keepalive_timer);
  247:   conn->keepalive_timer = NULL;
  248:   rfree(conn->hold_timer);
  249:   conn->hold_timer = NULL;
  250:   rfree(conn->sk);
  251:   conn->sk = NULL;
  252:   rfree(conn->tx_ev);
  253:   conn->tx_ev = NULL;
  254: }
  255: 
  256: 
  257: /**
  258:  * bgp_update_startup_delay - update a startup delay
  259:  * @p: BGP instance
  260:  *
  261:  * This function updates a startup delay that is used to postpone next BGP connect.
  262:  * It also handles disable_after_error and might stop BGP instance when error
  263:  * happened and disable_after_error is on.
  264:  *
  265:  * It should be called when BGP protocol error happened.
  266:  */
  267: void
  268: bgp_update_startup_delay(struct bgp_proto *p)
  269: {
  270:   struct bgp_config *cf = p->cf;
  271: 
  272:   DBG("BGP: Updating startup delay\n");
  273: 
  274:   if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
  275:     p->startup_delay = 0;
  276: 
  277:   p->last_proto_error = now;
  278: 
  279:   if (cf->disable_after_error)
  280:     {
  281:       p->startup_delay = 0;
  282:       p->p.disabled = 1;
  283:       return;
  284:     }
  285: 
  286:   if (!p->startup_delay)
  287:     p->startup_delay = cf->error_delay_time_min;
  288:   else
  289:     p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
  290: }
  291: 
  292: static void
  293: bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
  294: {
  295:   switch (conn->state)
  296:     {
  297:     case BS_IDLE:
  298:     case BS_CLOSE:
  299:       return;
  300:     case BS_CONNECT:
  301:     case BS_ACTIVE:
  302:       bgp_conn_enter_idle_state(conn);
  303:       return;
  304:     case BS_OPENSENT:
  305:     case BS_OPENCONFIRM:
  306:     case BS_ESTABLISHED:
  307:       bgp_error(conn, 6, subcode, NULL, 0);
  308:       return;
  309:     default:
  310:       bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
  311:     }
  312: }
  313: 
  314: static void
  315: bgp_down(struct bgp_proto *p)
  316: {
  317:   if (p->start_state > BSS_PREPARE)
  318:     bgp_close(p, 1);
  319: 
  320:   BGP_TRACE(D_EVENTS, "Down");
  321:   proto_notify_state(&p->p, PS_DOWN);
  322: }
  323: 
  324: static void
  325: bgp_decision(void *vp)
  326: {
  327:   struct bgp_proto *p = vp;
  328: 
  329:   DBG("BGP: Decision start\n");
  330:   if ((p->p.proto_state == PS_START)
  331:       && (p->outgoing_conn.state == BS_IDLE)
  332:       && (p->incoming_conn.state != BS_OPENCONFIRM)
  333:       && (!p->cf->passive))
  334:     bgp_active(p);
  335: 
  336:   if ((p->p.proto_state == PS_STOP)
  337:       && (p->outgoing_conn.state == BS_IDLE)
  338:       && (p->incoming_conn.state == BS_IDLE))
  339:     bgp_down(p);
  340: }
  341: 
  342: void
  343: bgp_stop(struct bgp_proto *p, unsigned subcode)
  344: {
  345:   proto_notify_state(&p->p, PS_STOP);
  346:   bgp_graceful_close_conn(&p->outgoing_conn, subcode);
  347:   bgp_graceful_close_conn(&p->incoming_conn, subcode);
  348:   ev_schedule(p->event);
  349: }
  350: 
  351: static inline void
  352: bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
  353: {
  354:   if (conn->bgp->p.mrtdump & MD_STATES)
  355:     mrt_dump_bgp_state_change(conn, conn->state, new_state);
  356: 
  357:   conn->state = new_state;
  358: }
  359: 
  360: void
  361: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
  362: {
  363:   /* Really, most of the work is done in bgp_rx_open(). */
  364:   bgp_conn_set_state(conn, BS_OPENCONFIRM);
  365: }
  366: 
  367: void
  368: bgp_conn_enter_established_state(struct bgp_conn *conn)
  369: {
  370:   struct bgp_proto *p = conn->bgp;
  371: 
  372:   BGP_TRACE(D_EVENTS, "BGP session established");
  373:   DBG("BGP: UP!!!\n");
  374: 
  375:   /* For multi-hop BGP sessions */
  376:   if (ipa_zero(p->source_addr))
  377:     p->source_addr = conn->sk->saddr;
  378: 
  379:   conn->sk->fast_rx = 0;
  380: 
  381:   p->conn = conn;
  382:   p->last_error_class = 0;
  383:   p->last_error_code = 0;
  384:   p->feed_state = BFS_NONE;
  385:   p->load_state = BFS_NONE;
  386:   bgp_init_bucket_table(p);
  387:   bgp_init_prefix_table(p, 8);
  388: 
  389:   int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
  390: 
  391:   if (p->p.gr_recovery && !peer_gr_ready)
  392:     proto_graceful_restart_unlock(&p->p);
  393: 
  394:   if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
  395:     p->p.gr_wait = 1;
  396: 
  397:   if (p->gr_active)
  398:     tm_stop(p->gr_timer);
  399: 
  400:   if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
  401:     bgp_graceful_restart_done(p);
  402: 
  403:   /* GR capability implies that neighbor will send End-of-RIB */
  404:   if (conn->peer_gr_aware)
  405:     p->load_state = BFS_LOADING;
  406: 
  407:   /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
  408: 
  409:   bgp_conn_set_state(conn, BS_ESTABLISHED);
  410:   proto_notify_state(&p->p, PS_UP);
  411: }
  412: 
  413: static void
  414: bgp_conn_leave_established_state(struct bgp_proto *p)
  415: {
  416:   BGP_TRACE(D_EVENTS, "BGP session closed");
  417:   p->conn = NULL;
  418: 
  419:   bgp_free_prefix_table(p);
  420:   bgp_free_bucket_table(p);
  421: 
  422:   if (p->p.proto_state == PS_UP)
  423:     bgp_stop(p, 0);
  424: }
  425: 
  426: void
  427: bgp_conn_enter_close_state(struct bgp_conn *conn)
  428: {
  429:   struct bgp_proto *p = conn->bgp;
  430:   int os = conn->state;
  431: 
  432:   bgp_conn_set_state(conn, BS_CLOSE);
  433:   tm_stop(conn->keepalive_timer);
  434:   conn->sk->rx_hook = NULL;
  435: 
  436:   /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
  437:   bgp_start_timer(conn->hold_timer, 10);
  438: 
  439:   if (os == BS_ESTABLISHED)
  440:     bgp_conn_leave_established_state(p);
  441: }
  442: 
  443: void
  444: bgp_conn_enter_idle_state(struct bgp_conn *conn)
  445: {
  446:   struct bgp_proto *p = conn->bgp;
  447:   int os = conn->state;
  448: 
  449:   bgp_close_conn(conn);
  450:   bgp_conn_set_state(conn, BS_IDLE);
  451:   ev_schedule(p->event);
  452: 
  453:   if (os == BS_ESTABLISHED)
  454:     bgp_conn_leave_established_state(p);
  455: }
  456: 
  457: /**
  458:  * bgp_handle_graceful_restart - handle detected BGP graceful restart
  459:  * @p: BGP instance
  460:  *
  461:  * This function is called when a BGP graceful restart of the neighbor is
  462:  * detected (when the TCP connection fails or when a new TCP connection
  463:  * appears). The function activates processing of the restart - starts routing
  464:  * table refresh cycle and activates BGP restart timer. The protocol state goes
  465:  * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
  466:  * caller.
  467:  */
  468: void
  469: bgp_handle_graceful_restart(struct bgp_proto *p)
  470: {
  471:   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
  472: 
  473:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
  474: 	    p->gr_active ? " - already pending" : "");
  475:   proto_notify_state(&p->p, PS_START);
  476: 
  477:   if (p->gr_active)
  478:     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  479: 
  480:   p->gr_active = 1;
  481:   bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
  482:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  483: }
  484: 
  485: /**
  486:  * bgp_graceful_restart_done - finish active BGP graceful restart
  487:  * @p: BGP instance
  488:  *
  489:  * This function is called when the active BGP graceful restart of the neighbor
  490:  * should be finished - either successfully (the neighbor sends all paths and
  491:  * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
  492:  * not support BGP graceful restart on the new session). The function ends
  493:  * routing table refresh cycle and stops BGP restart timer.
  494:  */
  495: void
  496: bgp_graceful_restart_done(struct bgp_proto *p)
  497: {
  498:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
  499:   p->gr_active = 0;
  500:   tm_stop(p->gr_timer);
  501:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  502: }
  503: 
  504: /**
  505:  * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
  506:  * @t: timer
  507:  *
  508:  * This function is a timeout hook for @gr_timer, implementing BGP restart time
  509:  * limit for reestablisment of the BGP session after the graceful restart. When
  510:  * fired, we just proceed with the usual protocol restart.
  511:  */
  512: 
  513: static void
  514: bgp_graceful_restart_timeout(timer *t)
  515: {
  516:   struct bgp_proto *p = t->data;
  517: 
  518:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
  519:   bgp_stop(p, 0);
  520: }
  521: 
  522: 
  523: /**
  524:  * bgp_refresh_begin - start incoming enhanced route refresh sequence
  525:  * @p: BGP instance
  526:  *
  527:  * This function is called when an incoming enhanced route refresh sequence is
  528:  * started by the neighbor, demarcated by the BoRR packet. The function updates
  529:  * the load state and starts the routing table refresh cycle. Note that graceful
  530:  * restart also uses routing table refresh cycle, but RFC 7313 and load states
  531:  * ensure that these two sequences do not overlap.
  532:  */
  533: void
  534: bgp_refresh_begin(struct bgp_proto *p)
  535: {
  536:   if (p->load_state == BFS_LOADING)
  537:     { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
  538: 
  539:   p->load_state = BFS_REFRESHING;
  540:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
  541: }
  542: 
  543: /**
  544:  * bgp_refresh_end - finish incoming enhanced route refresh sequence
  545:  * @p: BGP instance
  546:  *
  547:  * This function is called when an incoming enhanced route refresh sequence is
  548:  * finished by the neighbor, demarcated by the EoRR packet. The function updates
  549:  * the load state and ends the routing table refresh cycle. Routes not received
  550:  * during the sequence are removed by the nest.
  551:  */
  552: void
  553: bgp_refresh_end(struct bgp_proto *p)
  554: {
  555:   if (p->load_state != BFS_REFRESHING)
  556:     { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
  557: 
  558:   p->load_state = BFS_NONE;
  559:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
  560: }
  561: 
  562: 
  563: static void
  564: bgp_send_open(struct bgp_conn *conn)
  565: {
  566:   conn->start_state = conn->bgp->start_state;
  567: 
  568:   // Default values, possibly changed by receiving capabilities.
  569:   conn->advertised_as = 0;
  570:   conn->peer_refresh_support = 0;
  571:   conn->peer_as4_support = 0;
  572:   conn->peer_add_path = 0;
  573:   conn->peer_enhanced_refresh_support = 0;
  574:   conn->peer_gr_aware = 0;
  575:   conn->peer_gr_able = 0;
  576:   conn->peer_gr_time = 0;
  577:   conn->peer_gr_flags = 0;
  578:   conn->peer_gr_aflags = 0;
  579:   conn->peer_ext_messages_support = 0;
  580: 
  581:   DBG("BGP: Sending open\n");
  582:   conn->sk->rx_hook = bgp_rx;
  583:   conn->sk->tx_hook = bgp_tx;
  584:   tm_stop(conn->connect_retry_timer);
  585:   bgp_schedule_packet(conn, PKT_OPEN);
  586:   bgp_conn_set_state(conn, BS_OPENSENT);
  587:   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
  588: }
  589: 
  590: static void
  591: bgp_connected(sock *sk)
  592: {
  593:   struct bgp_conn *conn = sk->data;
  594:   struct bgp_proto *p = conn->bgp;
  595: 
  596:   BGP_TRACE(D_EVENTS, "Connected");
  597:   bgp_send_open(conn);
  598: }
  599: 
  600: static void
  601: bgp_connect_timeout(timer *t)
  602: {
  603:   struct bgp_conn *conn = t->data;
  604:   struct bgp_proto *p = conn->bgp;
  605: 
  606:   DBG("BGP: connect_timeout\n");
  607:   if (p->p.proto_state == PS_START)
  608:     {
  609:       bgp_close_conn(conn);
  610:       bgp_connect(p);
  611:     }
  612:   else
  613:     bgp_conn_enter_idle_state(conn);
  614: }
  615: 
  616: static void
  617: bgp_sock_err(sock *sk, int err)
  618: {
  619:   struct bgp_conn *conn = sk->data;
  620:   struct bgp_proto *p = conn->bgp;
  621: 
  622:   /*
  623:    * This error hook may be called either asynchronously from main
  624:    * loop, or synchronously from sk_send().  But sk_send() is called
  625:    * only from bgp_tx() and bgp_kick_tx(), which are both called
  626:    * asynchronously from main loop. Moreover, they end if err hook is
  627:    * called. Therefore, we could suppose that it is always called
  628:    * asynchronously.
  629:    */
  630: 
  631:   bgp_store_error(p, conn, BE_SOCKET, err);
  632: 
  633:   if (err)
  634:     BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
  635:   else
  636:     BGP_TRACE(D_EVENTS, "Connection closed");
  637: 
  638:   if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
  639:     bgp_handle_graceful_restart(p);
  640: 
  641:   bgp_conn_enter_idle_state(conn);
  642: }
  643: 
  644: static void
  645: bgp_hold_timeout(timer *t)
  646: {
  647:   struct bgp_conn *conn = t->data;
  648:   struct bgp_proto *p = conn->bgp;
  649: 
  650:   DBG("BGP: Hold timeout\n");
  651: 
  652:   /* We are already closing the connection - just do hangup */
  653:   if (conn->state == BS_CLOSE)
  654:   {
  655:     BGP_TRACE(D_EVENTS, "Connection stalled");
  656:     bgp_conn_enter_idle_state(conn);
  657:     return;
  658:   }
  659: 
  660:   /* If there is something in input queue, we are probably congested
  661:      and perhaps just not processed BGP packets in time. */
  662: 
  663:   if (sk_rx_ready(conn->sk) > 0)
  664:     bgp_start_timer(conn->hold_timer, 10);
  665:   else
  666:     bgp_error(conn, 4, 0, NULL, 0);
  667: }
  668: 
  669: static void
  670: bgp_keepalive_timeout(timer *t)
  671: {
  672:   struct bgp_conn *conn = t->data;
  673: 
  674:   DBG("BGP: Keepalive timer\n");
  675:   bgp_schedule_packet(conn, PKT_KEEPALIVE);
  676: 
  677:   /* Kick TX a bit faster */
  678:   if (ev_active(conn->tx_ev))
  679:     ev_run(conn->tx_ev);
  680: }
  681: 
  682: static void
  683: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
  684: {
  685:   timer *t;
  686: 
  687:   conn->sk = NULL;
  688:   conn->bgp = p;
  689:   conn->packets_to_send = 0;
  690: 
  691:   t = conn->connect_retry_timer = tm_new(p->p.pool);
  692:   t->hook = bgp_connect_timeout;
  693:   t->data = conn;
  694:   t = conn->hold_timer = tm_new(p->p.pool);
  695:   t->hook = bgp_hold_timeout;
  696:   t->data = conn;
  697:   t = conn->keepalive_timer = tm_new(p->p.pool);
  698:   t->hook = bgp_keepalive_timeout;
  699:   t->data = conn;
  700:   conn->tx_ev = ev_new(p->p.pool);
  701:   conn->tx_ev->hook = bgp_kick_tx;
  702:   conn->tx_ev->data = conn;
  703: }
  704: 
  705: static void
  706: bgp_setup_sk(struct bgp_conn *conn, sock *s)
  707: {
  708:   s->data = conn;
  709:   s->err_hook = bgp_sock_err;
  710:   s->fast_rx = 1;
  711:   conn->sk = s;
  712: }
  713: 
  714: static void
  715: bgp_active(struct bgp_proto *p)
  716: {
  717:   int delay = MAX(1, p->cf->connect_delay_time);
  718:   struct bgp_conn *conn = &p->outgoing_conn;
  719: 
  720:   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
  721:   bgp_setup_conn(p, conn);
  722:   bgp_conn_set_state(conn, BS_ACTIVE);
  723:   bgp_start_timer(conn->connect_retry_timer, delay);
  724: }
  725: 
  726: /**
  727:  * bgp_connect - initiate an outgoing connection
  728:  * @p: BGP instance
  729:  *
  730:  * The bgp_connect() function creates a new &bgp_conn and initiates
  731:  * a TCP connection to the peer. The rest of connection setup is governed
  732:  * by the BGP state machine as described in the standard.
  733:  */
  734: static void
  735: bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
  736: {
  737:   sock *s;
  738:   struct bgp_conn *conn = &p->outgoing_conn;
  739:   int hops = p->cf->multihop ? : 1;
  740: 
  741:   DBG("BGP: Connecting\n");
  742:   s = sk_new(p->p.pool);
  743:   s->type = SK_TCP_ACTIVE;
  744:   s->saddr = p->source_addr;
  745:   s->daddr = p->cf->remote_ip;
  746:   s->dport = p->cf->remote_port;
  747:   s->iface = p->neigh ? p->neigh->iface : NULL;
  748:   s->ttl = p->cf->ttl_security ? 255 : hops;
  749:   s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
  750:   s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
  751:   s->tos = IP_PREC_INTERNET_CONTROL;
  752:   s->password = p->cf->password;
  753:   s->tx_hook = bgp_connected;
  754:   BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
  755: 	    s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
  756:   bgp_setup_conn(p, conn);
  757:   bgp_setup_sk(conn, s);
  758:   bgp_conn_set_state(conn, BS_CONNECT);
  759: 
  760:   if (sk_open(s) < 0)
  761:     goto err;
  762: 
  763:   /* Set minimal receive TTL if needed */
  764:   if (p->cf->ttl_security)
  765:     if (sk_set_min_ttl(s, 256 - hops) < 0)
  766:       goto err;
  767: 
  768:   DBG("BGP: Waiting for connect success\n");
  769:   bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
  770:   return;
  771: 
  772:  err:
  773:   sk_log_error(s, p->p.name);
  774:   bgp_sock_err(s, 0);
  775:   return;
  776: }
  777: 
  778: /**
  779:  * bgp_find_proto - find existing proto for incoming connection
  780:  * @sk: TCP socket
  781:  *
  782:  */
  783: static struct bgp_proto *
  784: bgp_find_proto(sock *sk)
  785: {
  786:   struct proto_config *pc;
  787: 
  788:   WALK_LIST(pc, config->protos)
  789:     if ((pc->protocol == &proto_bgp) && pc->proto)
  790:       {
  791: 	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
  792: 	if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
  793: 	    (!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
  794: 	  return p;
  795:       }
  796: 
  797:   return NULL;
  798: }
  799: 
  800: /**
  801:  * bgp_incoming_connection - handle an incoming connection
  802:  * @sk: TCP socket
  803:  * @dummy: unused
  804:  *
  805:  * This function serves as a socket hook for accepting of new BGP
  806:  * connections. It searches a BGP instance corresponding to the peer
  807:  * which has connected and if such an instance exists, it creates a
  808:  * &bgp_conn structure, attaches it to the instance and either sends
  809:  * an Open message or (if there already is an active connection) it
  810:  * closes the new connection by sending a Notification message.
  811:  */
  812: static int
  813: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
  814: {
  815:   struct bgp_proto *p;
  816:   int acc, hops;
  817: 
  818:   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
  819:   p = bgp_find_proto(sk);
  820:   if (!p)
  821:     {
  822:       log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
  823: 	  sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
  824:       rfree(sk);
  825:       return 0;
  826:     }
  827: 
  828:   /*
  829:    * BIRD should keep multiple incoming connections in OpenSent state (for
  830:    * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
  831:    * connections are rejected istead. The exception is the case where an
  832:    * incoming connection triggers a graceful restart.
  833:    */
  834: 
  835:   acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
  836:     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
  837: 
  838:   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
  839:     {
  840:       bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
  841:       bgp_handle_graceful_restart(p);
  842:       bgp_conn_enter_idle_state(p->conn);
  843:       acc = 1;
  844: 
  845:       /* There might be separate incoming connection in OpenSent state */
  846:       if (p->incoming_conn.state > BS_ACTIVE)
  847: 	bgp_close_conn(&p->incoming_conn);
  848:     }
  849: 
  850:   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
  851: 	    sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
  852: 	    sk->dport, acc ? "accepted" : "rejected");
  853: 
  854:   if (!acc)
  855:     {
  856:       rfree(sk);
  857:       return 0;
  858:     }
  859: 
  860:   hops = p->cf->multihop ? : 1;
  861: 
  862:   if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
  863:     goto err;
  864: 
  865:   if (p->cf->ttl_security)
  866:     if (sk_set_min_ttl(sk, 256 - hops) < 0)
  867:       goto err;
  868: 
  869:   if (p->cf->enable_extended_messages)
  870:     {
  871:       sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
  872:       sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
  873:       sk_reallocate(sk);
  874:     }
  875: 
  876:   bgp_setup_conn(p, &p->incoming_conn);
  877:   bgp_setup_sk(&p->incoming_conn, sk);
  878:   bgp_send_open(&p->incoming_conn);
  879:   return 0;
  880: 
  881: err:
  882:   sk_log_error(sk, p->p.name);
  883:   log(L_ERR "%s: Incoming connection aborted", p->p.name);
  884:   rfree(sk);
  885:   return 0;
  886: }
  887: 
  888: static void
  889: bgp_listen_sock_err(sock *sk UNUSED, int err)
  890: {
  891:   if (err == ECONNABORTED)
  892:     log(L_WARN "BGP: Incoming connection aborted");
  893:   else
  894:     log(L_ERR "BGP: Error on listening socket: %M", err);
  895: }
  896: 
  897: static sock *
  898: bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
  899: {
  900:   sock *s = sk_new(&root_pool);
  901:   DBG("BGP: Creating listening socket\n");
  902:   s->type = SK_TCP_PASSIVE;
  903:   s->ttl = 255;
  904:   s->saddr = addr;
  905:   s->sport = port ? port : BGP_PORT;
  906:   s->flags = flags ? 0 : SKF_V6ONLY;
  907:   s->tos = IP_PREC_INTERNET_CONTROL;
  908:   s->rbsize = BGP_RX_BUFFER_SIZE;
  909:   s->tbsize = BGP_TX_BUFFER_SIZE;
  910:   s->rx_hook = bgp_incoming_connection;
  911:   s->err_hook = bgp_listen_sock_err;
  912: 
  913:   if (sk_open(s) < 0)
  914:     goto err;
  915: 
  916:   return s;
  917: 
  918:  err:
  919:   sk_log_error(s, "BGP");
  920:   log(L_ERR "BGP: Cannot open listening socket");
  921:   rfree(s);
  922:   return NULL;
  923: }
  924: 
  925: static void
  926: bgp_start_neighbor(struct bgp_proto *p)
  927: {
  928:   /* Called only for single-hop BGP sessions */
  929: 
  930:   if (ipa_zero(p->source_addr))
  931:     p->source_addr = p->neigh->ifa->ip;
  932: 
  933: #ifdef IPV6
  934:   {
  935:     struct ifa *a;
  936:     p->local_link = IPA_NONE;
  937:     WALK_LIST(a, p->neigh->iface->addrs)
  938:       if (a->scope == SCOPE_LINK)
  939:         {
  940: 	  p->local_link = a->ip;
  941: 	  break;
  942: 	}
  943: 
  944:     if (! ipa_nonzero(p->local_link))
  945:       log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
  946: 
  947:     DBG("BGP: Selected link-level address %I\n", p->local_link);
  948:   }
  949: #endif
  950: 
  951:   bgp_initiate(p);
  952: }
  953: 
  954: static void
  955: bgp_neigh_notify(neighbor *n)
  956: {
  957:   struct bgp_proto *p = (struct bgp_proto *) n->proto;
  958:   int ps = p->p.proto_state;
  959: 
  960:   if (n != p->neigh)
  961:     return;
  962: 
  963:   if ((ps == PS_DOWN) || (ps == PS_STOP))
  964:     return;
  965: 
  966:   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
  967: 
  968:   if (n->scope <= 0)
  969:     {
  970:       if (!prepare)
  971:         {
  972: 	  BGP_TRACE(D_EVENTS, "Neighbor lost");
  973: 	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
  974: 	  /* Perhaps also run bgp_update_startup_delay(p)? */
  975: 	  bgp_stop(p, 0);
  976: 	}
  977:     }
  978:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
  979:     {
  980:       if (!prepare)
  981:         {
  982: 	  BGP_TRACE(D_EVENTS, "Link down");
  983: 	  bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
  984: 	  if (ps == PS_UP)
  985: 	    bgp_update_startup_delay(p);
  986: 	  bgp_stop(p, 0);
  987: 	}
  988:     }
  989:   else
  990:     {
  991:       if (prepare)
  992: 	{
  993: 	  BGP_TRACE(D_EVENTS, "Neighbor ready");
  994: 	  bgp_start_neighbor(p);
  995: 	}
  996:     }
  997: }
  998: 
  999: static void
 1000: bgp_bfd_notify(struct bfd_request *req)
 1001: {
 1002:   struct bgp_proto *p = req->data;
 1003:   int ps = p->p.proto_state;
 1004: 
 1005:   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
 1006:     {
 1007:       BGP_TRACE(D_EVENTS, "BFD session down");
 1008:       bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
 1009:       if (ps == PS_UP)
 1010: 	bgp_update_startup_delay(p);
 1011:       bgp_stop(p, 0);
 1012:     }
 1013: }
 1014: 
 1015: static void
 1016: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
 1017: {
 1018:   if (use_bfd && !p->bfd_req)
 1019:     p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
 1020: 				     p->cf->multihop ? NULL : p->neigh->iface,
 1021: 				     bgp_bfd_notify, p);
 1022: 
 1023:   if (!use_bfd && p->bfd_req)
 1024:     {
 1025:       rfree(p->bfd_req);
 1026:       p->bfd_req = NULL;
 1027:     }
 1028: }
 1029: 
 1030: static int
 1031: bgp_reload_routes(struct proto *P)
 1032: {
 1033:   struct bgp_proto *p = (struct bgp_proto *) P;
 1034:   if (!p->conn || !p->conn->peer_refresh_support)
 1035:     return 0;
 1036: 
 1037:   bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
 1038:   return 1;
 1039: }
 1040: 
 1041: static void
 1042: bgp_feed_begin(struct proto *P, int initial)
 1043: {
 1044:   struct bgp_proto *p = (struct bgp_proto *) P;
 1045: 
 1046:   /* This should not happen */
 1047:   if (!p->conn)
 1048:     return;
 1049: 
 1050:   if (initial && p->cf->gr_mode)
 1051:     p->feed_state = BFS_LOADING;
 1052: 
 1053:   /* It is refeed and both sides support enhanced route refresh */
 1054:   if (!initial && p->cf->enable_refresh &&
 1055:       p->conn->peer_enhanced_refresh_support)
 1056:     {
 1057:       /* BoRR must not be sent before End-of-RIB */
 1058:       if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
 1059: 	return;
 1060: 
 1061:       p->feed_state = BFS_REFRESHING;
 1062:       bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
 1063:     }
 1064: }
 1065: 
 1066: static void
 1067: bgp_feed_end(struct proto *P)
 1068: {
 1069:   struct bgp_proto *p = (struct bgp_proto *) P;
 1070: 
 1071:   /* This should not happen */
 1072:   if (!p->conn)
 1073:     return;
 1074: 
 1075:   /* Non-demarcated feed ended, nothing to do */
 1076:   if (p->feed_state == BFS_NONE)
 1077:     return;
 1078: 
 1079:   /* Schedule End-of-RIB packet */
 1080:   if (p->feed_state == BFS_LOADING)
 1081:     p->feed_state = BFS_LOADED;
 1082: 
 1083:   /* Schedule EoRR packet */
 1084:   if (p->feed_state == BFS_REFRESHING)
 1085:     p->feed_state = BFS_REFRESHED;
 1086: 
 1087:   /* Kick TX hook */
 1088:   bgp_schedule_packet(p->conn, PKT_UPDATE);
 1089: }
 1090: 
 1091: 
 1092: static void
 1093: bgp_start_locked(struct object_lock *lock)
 1094: {
 1095:   struct bgp_proto *p = lock->data;
 1096:   struct bgp_config *cf = p->cf;
 1097: 
 1098:   if (p->p.proto_state != PS_START)
 1099:     {
 1100:       DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
 1101:       return;
 1102:     }
 1103: 
 1104:   DBG("BGP: Got lock\n");
 1105: 
 1106:   if (cf->multihop)
 1107:     {
 1108:       /* Multi-hop sessions do not use neighbor entries */
 1109:       bgp_initiate(p);
 1110:       return;
 1111:     }
 1112: 
 1113:   neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
 1114:   if (!n)
 1115:     {
 1116:       log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
 1117:       /* As we do not start yet, we can just disable protocol */
 1118:       p->p.disabled = 1;
 1119:       bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
 1120:       proto_notify_state(&p->p, PS_DOWN);
 1121:       return;
 1122:     }
 1123: 
 1124:   p->neigh = n;
 1125: 
 1126:   if (n->scope <= 0)
 1127:     BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
 1128:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
 1129:     BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
 1130:   else
 1131:     bgp_start_neighbor(p);
 1132: }
 1133: 
 1134: static int
 1135: bgp_start(struct proto *P)
 1136: {
 1137:   struct bgp_proto *p = (struct bgp_proto *) P;
 1138:   struct object_lock *lock;
 1139: 
 1140:   DBG("BGP: Startup.\n");
 1141:   p->start_state = BSS_PREPARE;
 1142:   p->outgoing_conn.state = BS_IDLE;
 1143:   p->incoming_conn.state = BS_IDLE;
 1144:   p->neigh = NULL;
 1145:   p->bfd_req = NULL;
 1146:   p->gr_ready = 0;
 1147:   p->gr_active = 0;
 1148: 
 1149:   rt_lock_table(p->igp_table);
 1150: 
 1151:   p->event = ev_new(p->p.pool);
 1152:   p->event->hook = bgp_decision;
 1153:   p->event->data = p;
 1154: 
 1155:   p->startup_timer = tm_new(p->p.pool);
 1156:   p->startup_timer->hook = bgp_startup_timeout;
 1157:   p->startup_timer->data = p;
 1158: 
 1159:   p->gr_timer = tm_new(p->p.pool);
 1160:   p->gr_timer->hook = bgp_graceful_restart_timeout;
 1161:   p->gr_timer->data = p;
 1162: 
 1163:   p->local_id = proto_get_router_id(P->cf);
 1164:   if (p->rr_client)
 1165:     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
 1166: 
 1167:   p->remote_id = 0;
 1168:   p->source_addr = p->cf->source_addr;
 1169: 
 1170:   if (p->p.gr_recovery && p->cf->gr_mode)
 1171:     proto_graceful_restart_lock(P);
 1172: 
 1173:   /*
 1174:    *  Before attempting to create the connection, we need to lock the
 1175:    *  port, so that are sure we're the only instance attempting to talk
 1176:    *  with that neighbor.
 1177:    */
 1178: 
 1179:   lock = p->lock = olock_new(P->pool);
 1180:   lock->addr = p->cf->remote_ip;
 1181:   lock->port = p->cf->remote_port;
 1182:   lock->iface = p->cf->iface;
 1183:   lock->type = OBJLOCK_TCP;
 1184:   lock->hook = bgp_start_locked;
 1185:   lock->data = p;
 1186:   olock_acquire(lock);
 1187: 
 1188:   return PS_START;
 1189: }
 1190: 
 1191: extern int proto_restart;
 1192: 
 1193: static int
 1194: bgp_shutdown(struct proto *P)
 1195: {
 1196:   struct bgp_proto *p = (struct bgp_proto *) P;
 1197:   unsigned subcode = 0;
 1198: 
 1199:   BGP_TRACE(D_EVENTS, "Shutdown requested");
 1200: 
 1201:   switch (P->down_code)
 1202:     {
 1203:     case PDC_CF_REMOVE:
 1204:     case PDC_CF_DISABLE:
 1205:       subcode = 3; // Errcode 6, 3 - peer de-configured
 1206:       break;
 1207: 
 1208:     case PDC_CF_RESTART:
 1209:       subcode = 6; // Errcode 6, 6 - other configuration change
 1210:       break;
 1211: 
 1212:     case PDC_CMD_DISABLE:
 1213:     case PDC_CMD_SHUTDOWN:
 1214:       subcode = 2; // Errcode 6, 2 - administrative shutdown
 1215:       break;
 1216: 
 1217:     case PDC_CMD_RESTART:
 1218:       subcode = 4; // Errcode 6, 4 - administrative reset
 1219:       break;
 1220: 
 1221:     case PDC_RX_LIMIT_HIT:
 1222:     case PDC_IN_LIMIT_HIT:
 1223:       subcode = 1; // Errcode 6, 1 - max number of prefixes reached
 1224:       /* log message for compatibility */
 1225:       log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
 1226:       goto limit;
 1227: 
 1228:     case PDC_OUT_LIMIT_HIT:
 1229:       subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
 1230: 
 1231:     limit:
 1232:       bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
 1233:       if (proto_restart)
 1234: 	bgp_update_startup_delay(p);
 1235:       else
 1236: 	p->startup_delay = 0;
 1237:       goto done;
 1238:     }
 1239: 
 1240:   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
 1241:   p->startup_delay = 0;
 1242: 
 1243:  done:
 1244:   bgp_stop(p, subcode);
 1245:   return p->p.proto_state;
 1246: }
 1247: 
 1248: static void
 1249: bgp_cleanup(struct proto *P)
 1250: {
 1251:   struct bgp_proto *p = (struct bgp_proto *) P;
 1252:   rt_unlock_table(p->igp_table);
 1253: }
 1254: 
 1255: static rtable *
 1256: get_igp_table(struct bgp_config *cf)
 1257: {
 1258:   return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
 1259: }
 1260: 
 1261: static struct proto *
 1262: bgp_init(struct proto_config *C)
 1263: {
 1264:   struct proto *P = proto_new(C, sizeof(struct bgp_proto));
 1265:   struct bgp_config *c = (struct bgp_config *) C;
 1266:   struct bgp_proto *p = (struct bgp_proto *) P;
 1267: 
 1268:   P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
 1269:   P->rt_notify = bgp_rt_notify;
 1270:   P->import_control = bgp_import_control;
 1271:   P->neigh_notify = bgp_neigh_notify;
 1272:   P->reload_routes = bgp_reload_routes;
 1273:   P->feed_begin = bgp_feed_begin;
 1274:   P->feed_end = bgp_feed_end;
 1275:   P->rte_better = bgp_rte_better;
 1276:   P->rte_mergable = bgp_rte_mergable;
 1277:   P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
 1278: 
 1279:   p->cf = c;
 1280:   p->local_as = c->local_as;
 1281:   p->remote_as = c->remote_as;
 1282:   p->is_internal = (c->local_as == c->remote_as);
 1283:   p->rs_client = c->rs_client;
 1284:   p->rr_client = c->rr_client;
 1285:   p->igp_table = get_igp_table(c);
 1286: 
 1287:   return P;
 1288: }
 1289: 
 1290: 
 1291: void
 1292: bgp_check_config(struct bgp_config *c)
 1293: {
 1294:   int internal = (c->local_as == c->remote_as);
 1295: 
 1296:   /* Do not check templates at all */
 1297:   if (c->c.class == SYM_TEMPLATE)
 1298:     return;
 1299: 
 1300: 
 1301:   /* EBGP direct by default, IBGP multihop by default */
 1302:   if (c->multihop < 0)
 1303:     c->multihop = internal ? 64 : 0;
 1304: 
 1305:   /* Different default for gw_mode */
 1306:   if (!c->gw_mode)
 1307:     c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
 1308: 
 1309:   /* Different default based on rs_client */
 1310:   if (!c->missing_lladdr)
 1311:     c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
 1312: 
 1313:   /* Disable after error incompatible with restart limit action */
 1314:   if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
 1315:     c->c.in_limit->action = PLA_DISABLE;
 1316: 
 1317: 
 1318:   if (!c->local_as)
 1319:     cf_error("Local AS number must be set");
 1320: 
 1321:   if (ipa_zero(c->remote_ip))
 1322:     cf_error("Neighbor must be configured");
 1323: 
 1324:   if (!c->remote_as)
 1325:     cf_error("Remote AS number must be set");
 1326: 
 1327:   // if (ipa_is_link_local(c->remote_ip) && !c->iface)
 1328:   //   cf_error("Link-local neighbor address requires specified interface");
 1329: 
 1330:   if (!ipa_is_link_local(c->remote_ip) != !c->iface)
 1331:     cf_error("Link-local address and interface scope must be used together");
 1332: 
 1333:   if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
 1334:     cf_error("Neighbor AS number out of range (AS4 not available)");
 1335: 
 1336:   if (!internal && c->rr_client)
 1337:     cf_error("Only internal neighbor can be RR client");
 1338: 
 1339:   if (internal && c->rs_client)
 1340:     cf_error("Only external neighbor can be RS client");
 1341: 
 1342:   if (c->multihop && (c->gw_mode == GW_DIRECT))
 1343:     cf_error("Multihop BGP cannot use direct gateway mode");
 1344: 
 1345:   if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
 1346: 		      ipa_is_link_local(c->source_addr)))
 1347:     cf_error("Multihop BGP cannot be used with link-local addresses");
 1348: 
 1349:   if (c->multihop && c->check_link)
 1350:     cf_error("Multihop BGP cannot depend on link state");
 1351: 
 1352:   if (c->multihop && c->bfd && ipa_zero(c->source_addr))
 1353:     cf_error("Multihop BGP with BFD requires specified source address");
 1354: 
 1355:   if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
 1356:     cf_error("BGP in recursive mode prohibits sorted table");
 1357: 
 1358:   if (c->deterministic_med && c->c.table->sorted)
 1359:     cf_error("BGP with deterministic MED prohibits sorted table");
 1360: 
 1361:   if (c->secondary && !c->c.table->sorted)
 1362:     cf_error("BGP with secondary option requires sorted table");
 1363: }
 1364: 
 1365: static int
 1366: bgp_reconfigure(struct proto *P, struct proto_config *C)
 1367: {
 1368:   struct bgp_config *new = (struct bgp_config *) C;
 1369:   struct bgp_proto *p = (struct bgp_proto *) P;
 1370:   struct bgp_config *old = p->cf;
 1371: 
 1372:   if (proto_get_router_id(C) != p->local_id)
 1373:     return 0;
 1374: 
 1375:   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
 1376: 		     ((byte *) new) + sizeof(struct proto_config),
 1377: 		     // password item is last and must be checked separately
 1378: 		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
 1379:     && ((!old->password && !new->password)
 1380: 	|| (old->password && new->password && !strcmp(old->password, new->password)))
 1381:     && (get_igp_table(old) == get_igp_table(new));
 1382: 
 1383:   if (same && (p->start_state > BSS_PREPARE))
 1384:     bgp_update_bfd(p, new->bfd);
 1385: 
 1386:   /* We should update our copy of configuration ptr as old configuration will be freed */
 1387:   if (same)
 1388:     p->cf = new;
 1389: 
 1390:   return same;
 1391: }
 1392: 
 1393: static void
 1394: bgp_copy_config(struct proto_config *dest, struct proto_config *src)
 1395: {
 1396:   /* Just a shallow copy */
 1397:   proto_copy_rest(dest, src, sizeof(struct bgp_config));
 1398: }
 1399: 
 1400: 
 1401: /**
 1402:  * bgp_error - report a protocol error
 1403:  * @c: connection
 1404:  * @code: error code (according to the RFC)
 1405:  * @subcode: error sub-code
 1406:  * @data: data to be passed in the Notification message
 1407:  * @len: length of the data
 1408:  *
 1409:  * bgp_error() sends a notification packet to tell the other side that a protocol
 1410:  * error has occurred (including the data considered erroneous if possible) and
 1411:  * closes the connection.
 1412:  */
 1413: void
 1414: bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
 1415: {
 1416:   struct bgp_proto *p = c->bgp;
 1417: 
 1418:   if (c->state == BS_CLOSE)
 1419:     return;
 1420: 
 1421:   bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
 1422:   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
 1423:   bgp_conn_enter_close_state(c);
 1424: 
 1425:   c->notify_code = code;
 1426:   c->notify_subcode = subcode;
 1427:   c->notify_data = data;
 1428:   c->notify_size = (len > 0) ? len : 0;
 1429:   bgp_schedule_packet(c, PKT_NOTIFICATION);
 1430: 
 1431:   if (code != 6)
 1432:     {
 1433:       bgp_update_startup_delay(p);
 1434:       bgp_stop(p, 0);
 1435:     }
 1436: }
 1437: 
 1438: /**
 1439:  * bgp_store_error - store last error for status report
 1440:  * @p: BGP instance
 1441:  * @c: connection
 1442:  * @class: error class (BE_xxx constants)
 1443:  * @code: error code (class specific)
 1444:  *
 1445:  * bgp_store_error() decides whether given error is interesting enough
 1446:  * and store that error to last_error variables of @p
 1447:  */
 1448: void
 1449: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
 1450: {
 1451:   /* During PS_UP, we ignore errors on secondary connection */
 1452:   if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
 1453:     return;
 1454: 
 1455:   /* During PS_STOP, we ignore any errors, as we want to report
 1456:    * the error that caused transition to PS_STOP
 1457:    */
 1458:   if (p->p.proto_state == PS_STOP)
 1459:     return;
 1460: 
 1461:   p->last_error_class = class;
 1462:   p->last_error_code = code;
 1463: }
 1464: 
 1465: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
 1466: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
 1467: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
 1468: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
 1469: 
 1470: static const char *
 1471: bgp_last_errmsg(struct bgp_proto *p)
 1472: {
 1473:   switch (p->last_error_class)
 1474:     {
 1475:     case BE_MISC:
 1476:       return bgp_misc_errors[p->last_error_code];
 1477:     case BE_SOCKET:
 1478:       return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
 1479:     case BE_BGP_RX:
 1480:     case BE_BGP_TX:
 1481:       return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
 1482:     case BE_AUTO_DOWN:
 1483:       return bgp_auto_errors[p->last_error_code];
 1484:     default:
 1485:       return "";
 1486:     }
 1487: }
 1488: 
 1489: static const char *
 1490: bgp_state_dsc(struct bgp_proto *p)
 1491: {
 1492:   if (p->p.proto_state == PS_DOWN)
 1493:     return "Down";
 1494: 
 1495:   int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
 1496:   if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
 1497:     return "Passive";
 1498: 
 1499:   return bgp_state_names[state];
 1500: }
 1501: 
 1502: static void
 1503: bgp_get_status(struct proto *P, byte *buf)
 1504: {
 1505:   struct bgp_proto *p = (struct bgp_proto *) P;
 1506: 
 1507:   const char *err1 = bgp_err_classes[p->last_error_class];
 1508:   const char *err2 = bgp_last_errmsg(p);
 1509: 
 1510:   if (P->proto_state == PS_DOWN)
 1511:     bsprintf(buf, "%s%s", err1, err2);
 1512:   else
 1513:     bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
 1514: }
 1515: 
 1516: static void
 1517: bgp_show_proto_info(struct proto *P)
 1518: {
 1519:   struct bgp_proto *p = (struct bgp_proto *) P;
 1520:   struct bgp_conn *c = p->conn;
 1521: 
 1522:   proto_show_basic_info(P);
 1523: 
 1524:   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
 1525:   cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
 1526:   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
 1527: 
 1528:   if (p->gr_active)
 1529:     cli_msg(-1006, "    Neighbor graceful restart active");
 1530: 
 1531:   if (P->proto_state == PS_START)
 1532:     {
 1533:       struct bgp_conn *oc = &p->outgoing_conn;
 1534: 
 1535:       if ((p->start_state < BSS_CONNECT) &&
 1536: 	  (p->startup_timer->expires))
 1537: 	cli_msg(-1006, "    Error wait:       %d/%d",
 1538: 		p->startup_timer->expires - now, p->startup_delay);
 1539: 
 1540:       if ((oc->state == BS_ACTIVE) &&
 1541: 	  (oc->connect_retry_timer->expires))
 1542: 	cli_msg(-1006, "    Connect delay:    %d/%d",
 1543: 		oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
 1544: 
 1545:       if (p->gr_active && p->gr_timer->expires)
 1546: 	cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
 1547:     }
 1548:   else if (P->proto_state == PS_UP)
 1549:     {
 1550:       cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
 1551:       cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
 1552: 	      c->peer_refresh_support ? " refresh" : "",
 1553: 	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
 1554: 	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
 1555: 	      c->peer_as4_support ? " AS4" : "",
 1556: 	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
 1557: 	      (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
 1558: 	      c->peer_ext_messages_support ? " ext-messages" : "");
 1559:       cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
 1560: 	      p->is_internal ? "internal" : "external",
 1561: 	      p->cf->multihop ? " multihop" : "",
 1562: 	      p->rr_client ? " route-reflector" : "",
 1563: 	      p->rs_client ? " route-server" : "",
 1564: 	      p->as4_session ? " AS4" : "",
 1565: 	      p->add_path_rx ? " add-path-rx" : "",
 1566: 	      p->add_path_tx ? " add-path-tx" : "",
 1567: 	      p->ext_messages ? " ext-messages" : "");
 1568:       cli_msg(-1006, "    Source address:   %I", p->source_addr);
 1569:       if (P->cf->in_limit)
 1570: 	cli_msg(-1006, "    Route limit:      %d/%d",
 1571: 		p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
 1572:       cli_msg(-1006, "    Hold timer:       %d/%d",
 1573: 	      tm_remains(c->hold_timer), c->hold_time);
 1574:       cli_msg(-1006, "    Keepalive timer:  %d/%d",
 1575: 	      tm_remains(c->keepalive_timer), c->keepalive_time);
 1576:     }
 1577: 
 1578:   if ((p->last_error_class != BE_NONE) &&
 1579:       (p->last_error_class != BE_MAN_DOWN))
 1580:     {
 1581:       const char *err1 = bgp_err_classes[p->last_error_class];
 1582:       const char *err2 = bgp_last_errmsg(p);
 1583:       cli_msg(-1006, "    Last error:       %s%s", err1, err2);
 1584:     }
 1585: }
 1586: 
 1587: struct protocol proto_bgp = {
 1588:   .name = 		"BGP",
 1589:   .template = 		"bgp%d",
 1590:   .attr_class = 	EAP_BGP,
 1591:   .preference = 	DEF_PREF_BGP,
 1592:   .config_size =	sizeof(struct bgp_config),
 1593:   .init = 		bgp_init,
 1594:   .start = 		bgp_start,
 1595:   .shutdown = 		bgp_shutdown,
 1596:   .cleanup = 		bgp_cleanup,
 1597:   .reconfigure = 	bgp_reconfigure,
 1598:   .copy_config = 	bgp_copy_config,
 1599:   .get_status = 	bgp_get_status,
 1600:   .get_attr = 		bgp_get_attr,
 1601:   .get_route_info = 	bgp_get_route_info,
 1602:   .show_proto_info = 	bgp_show_proto_info
 1603: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>