Annotation of embedaddon/bird/proto/bgp/bgp.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD -- The Border Gateway Protocol
                      3:  *
                      4:  *     (c) 2000 Martin Mares <mj@ucw.cz>
                      5:  *
                      6:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      7:  */
                      8: 
                      9: /**
                     10:  * DOC: Border Gateway Protocol
                     11:  *
                     12:  * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
                     13:  * connection and most of the interface with BIRD core, |packets.c| handling
                     14:  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
                     15:  * manipulation with BGP attribute lists.
                     16:  *
                     17:  * As opposed to the other existing routing daemons, BIRD has a sophisticated core
                     18:  * architecture which is able to keep all the information needed by BGP in the
                     19:  * primary routing table, therefore no complex data structures like a central
                     20:  * BGP table are needed. This increases memory footprint of a BGP router with
                     21:  * many connections, but not too much and, which is more important, it makes
                     22:  * BGP much easier to implement.
                     23:  *
                     24:  * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
                     25:  * structure to which are attached individual connections represented by &bgp_connection
                     26:  * (usually, there exists only one connection, but during BGP session setup, there
                     27:  * can be more of them). The connections are handled according to the BGP state machine
                     28:  * defined in the RFC with all the timers and all the parameters configurable.
                     29:  *
                     30:  * In incoming direction, we listen on the connection's socket and each time we receive
                     31:  * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
                     32:  * passes complete packets to bgp_rx_packet() which distributes the packet according
                     33:  * to its type.
                     34:  *
                     35:  * In outgoing direction, we gather all the routing updates and sort them to buckets
                     36:  * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
                     37:  * of &rta's and a &fib which helps us to find if we already have another route for
                     38:  * the same destination queued for sending, so that we can replace it with the new one
                     39:  * immediately instead of sending both updates). There also exists a special bucket holding
                     40:  * all the route withdrawals which cannot be queued anywhere else as they don't have any
                     41:  * attributes. If we have any packet to send (due to either new routes or the connection
                     42:  * tracking code wanting to send a Open, Keepalive or Notification message), we call
                     43:  * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
                     44:  * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
                     45:  * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
                     46:  * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
                     47:  * type if we have more data of the same type to send.
                     48:  *
                     49:  * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
                     50:  * of the attribute blocks and translating them to the language of BIRD's extended attributes
                     51:  * and bgp_encode_attrs() which does the converse. Both functions are built around a
                     52:  * @bgp_attr_table array describing all important characteristics of all known attributes.
                     53:  * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
                     54:  *
                     55:  * BGP protocol implements graceful restart in both restarting (local restart)
                     56:  * and receiving (neighbor restart) roles. The first is handled mostly by the
                     57:  * graceful restart code in the nest, BGP protocol just handles capabilities,
                     58:  * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
                     59:  * The second is implemented by internal restart of the BGP state to %BS_IDLE
                     60:  * and protocol state to %PS_START, but keeping the protocol up from the core
                     61:  * point of view and therefore maintaining received routes. Routing table
                     62:  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
                     63:  * stale routes after reestablishment of BGP session during graceful restart.
                     64:  */
                     65: 
                     66: #undef LOCAL_DEBUG
                     67: 
                     68: #include "nest/bird.h"
                     69: #include "nest/iface.h"
                     70: #include "nest/protocol.h"
                     71: #include "nest/route.h"
                     72: #include "nest/cli.h"
                     73: #include "nest/locks.h"
                     74: #include "conf/conf.h"
                     75: #include "lib/socket.h"
                     76: #include "lib/resource.h"
                     77: #include "lib/string.h"
                     78: 
                     79: #include "bgp.h"
                     80: 
                     81: 
                     82: struct linpool *bgp_linpool;           /* Global temporary pool */
                     83: static sock *bgp_listen_sk;            /* Global listening socket */
                     84: static int bgp_counter;                        /* Number of protocol instances using the listening socket */
                     85: 
                     86: static void bgp_close(struct bgp_proto *p, int apply_md5);
                     87: static void bgp_connect(struct bgp_proto *p);
                     88: static void bgp_active(struct bgp_proto *p);
                     89: static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
                     90: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
                     91: 
                     92: 
                     93: /**
                     94:  * bgp_open - open a BGP instance
                     95:  * @p: BGP instance
                     96:  *
                     97:  * This function allocates and configures shared BGP resources.
                     98:  * Should be called as the last step during initialization
                     99:  * (when lock is acquired and neighbor is ready).
                    100:  * When error, state changed to PS_DOWN, -1 is returned and caller
                    101:  * should return immediately.
                    102:  */
                    103: static int
                    104: bgp_open(struct bgp_proto *p)
                    105: {
                    106:   struct config *cfg = p->cf->c.global;
                    107:   int errcode;
                    108: 
                    109:   if (!bgp_listen_sk)
                    110:     bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
                    111: 
                    112:   if (!bgp_listen_sk)
                    113:     {
                    114:       errcode = BEM_NO_SOCKET;
                    115:       goto err;
                    116:     }
                    117: 
                    118:   if (!bgp_linpool)
                    119:     bgp_linpool = lp_new(&root_pool, 4080);
                    120: 
                    121:   bgp_counter++;
                    122: 
                    123:   if (p->cf->password)
                    124:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
                    125:                        p->cf->iface, p->cf->password, p->cf->setkey) < 0)
                    126:       {
                    127:        sk_log_error(bgp_listen_sk, p->p.name);
                    128:        bgp_close(p, 0);
                    129:        errcode = BEM_INVALID_MD5;
                    130:        goto err;
                    131:       }
                    132: 
                    133:   return 0;
                    134: 
                    135: err:
                    136:   p->p.disabled = 1;
                    137:   bgp_store_error(p, NULL, BE_MISC, errcode);
                    138:   proto_notify_state(&p->p, PS_DOWN);
                    139:   return -1;
                    140: }
                    141: 
                    142: static void
                    143: bgp_startup(struct bgp_proto *p)
                    144: {
                    145:   BGP_TRACE(D_EVENTS, "Started");
                    146:   p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
                    147: 
                    148:   if (!p->cf->passive)
                    149:     bgp_active(p);
                    150: }
                    151: 
                    152: static void
                    153: bgp_startup_timeout(timer *t)
                    154: {
                    155:   bgp_startup(t->data);
                    156: }
                    157: 
                    158: 
                    159: static void
                    160: bgp_initiate(struct bgp_proto *p)
                    161: {
                    162:   int rv = bgp_open(p);
                    163:   if (rv < 0)
                    164:     return;
                    165: 
                    166:   if (p->cf->bfd)
                    167:     bgp_update_bfd(p, p->cf->bfd);
                    168: 
                    169:   if (p->startup_delay)
                    170:     {
                    171:       p->start_state = BSS_DELAY;
                    172:       BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
                    173:       bgp_start_timer(p->startup_timer, p->startup_delay);
                    174:     }
                    175:   else
                    176:     bgp_startup(p);
                    177: }
                    178: 
                    179: /**
                    180:  * bgp_close - close a BGP instance
                    181:  * @p: BGP instance
                    182:  * @apply_md5: 0 to disable unsetting MD5 auth
                    183:  *
                    184:  * This function frees and deconfigures shared BGP resources.
                    185:  * @apply_md5 is set to 0 when bgp_close is called as a cleanup
                    186:  * from failed bgp_open().
                    187:  */
                    188: static void
                    189: bgp_close(struct bgp_proto *p, int apply_md5)
                    190: {
                    191:   ASSERT(bgp_counter);
                    192:   bgp_counter--;
                    193: 
                    194:   if (p->cf->password && apply_md5)
                    195:     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
                    196:                        p->cf->iface, NULL, p->cf->setkey) < 0)
                    197:       sk_log_error(bgp_listen_sk, p->p.name);
                    198: 
                    199:   if (!bgp_counter)
                    200:     {
                    201:       rfree(bgp_listen_sk);
                    202:       bgp_listen_sk = NULL;
                    203:       rfree(bgp_linpool);
                    204:       bgp_linpool = NULL;
                    205:     }
                    206: }
                    207: 
                    208: /**
                    209:  * bgp_start_timer - start a BGP timer
                    210:  * @t: timer
                    211:  * @value: time to fire (0 to disable the timer)
                    212:  *
                    213:  * This functions calls tm_start() on @t with time @value and the
                    214:  * amount of randomization suggested by the BGP standard. Please use
                    215:  * it for all BGP timers.
                    216:  */
                    217: void
                    218: bgp_start_timer(timer *t, int value)
                    219: {
                    220:   if (value)
                    221:     {
                    222:       /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
                    223:       t->randomize = value / 4;
                    224:       tm_start(t, value - t->randomize);
                    225:     }
                    226:   else
                    227:     tm_stop(t);
                    228: }
                    229: 
                    230: /**
                    231:  * bgp_close_conn - close a BGP connection
                    232:  * @conn: connection to close
                    233:  *
                    234:  * This function takes a connection described by the &bgp_conn structure,
                    235:  * closes its socket and frees all resources associated with it.
                    236:  */
                    237: void
                    238: bgp_close_conn(struct bgp_conn *conn)
                    239: {
                    240:   // struct bgp_proto *p = conn->bgp;
                    241: 
                    242:   DBG("BGP: Closing connection\n");
                    243:   conn->packets_to_send = 0;
                    244:   rfree(conn->connect_retry_timer);
                    245:   conn->connect_retry_timer = NULL;
                    246:   rfree(conn->keepalive_timer);
                    247:   conn->keepalive_timer = NULL;
                    248:   rfree(conn->hold_timer);
                    249:   conn->hold_timer = NULL;
                    250:   rfree(conn->sk);
                    251:   conn->sk = NULL;
                    252:   rfree(conn->tx_ev);
                    253:   conn->tx_ev = NULL;
                    254: }
                    255: 
                    256: 
                    257: /**
                    258:  * bgp_update_startup_delay - update a startup delay
                    259:  * @p: BGP instance
                    260:  *
                    261:  * This function updates a startup delay that is used to postpone next BGP connect.
                    262:  * It also handles disable_after_error and might stop BGP instance when error
                    263:  * happened and disable_after_error is on.
                    264:  *
                    265:  * It should be called when BGP protocol error happened.
                    266:  */
                    267: void
                    268: bgp_update_startup_delay(struct bgp_proto *p)
                    269: {
                    270:   struct bgp_config *cf = p->cf;
                    271: 
                    272:   DBG("BGP: Updating startup delay\n");
                    273: 
                    274:   if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
                    275:     p->startup_delay = 0;
                    276: 
                    277:   p->last_proto_error = now;
                    278: 
                    279:   if (cf->disable_after_error)
                    280:     {
                    281:       p->startup_delay = 0;
                    282:       p->p.disabled = 1;
                    283:       return;
                    284:     }
                    285: 
                    286:   if (!p->startup_delay)
                    287:     p->startup_delay = cf->error_delay_time_min;
                    288:   else
                    289:     p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
                    290: }
                    291: 
                    292: static void
                    293: bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
                    294: {
                    295:   switch (conn->state)
                    296:     {
                    297:     case BS_IDLE:
                    298:     case BS_CLOSE:
                    299:       return;
                    300:     case BS_CONNECT:
                    301:     case BS_ACTIVE:
                    302:       bgp_conn_enter_idle_state(conn);
                    303:       return;
                    304:     case BS_OPENSENT:
                    305:     case BS_OPENCONFIRM:
                    306:     case BS_ESTABLISHED:
                    307:       bgp_error(conn, 6, subcode, NULL, 0);
                    308:       return;
                    309:     default:
                    310:       bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
                    311:     }
                    312: }
                    313: 
                    314: static void
                    315: bgp_down(struct bgp_proto *p)
                    316: {
                    317:   if (p->start_state > BSS_PREPARE)
                    318:     bgp_close(p, 1);
                    319: 
                    320:   BGP_TRACE(D_EVENTS, "Down");
                    321:   proto_notify_state(&p->p, PS_DOWN);
                    322: }
                    323: 
                    324: static void
                    325: bgp_decision(void *vp)
                    326: {
                    327:   struct bgp_proto *p = vp;
                    328: 
                    329:   DBG("BGP: Decision start\n");
                    330:   if ((p->p.proto_state == PS_START)
                    331:       && (p->outgoing_conn.state == BS_IDLE)
                    332:       && (p->incoming_conn.state != BS_OPENCONFIRM)
                    333:       && (!p->cf->passive))
                    334:     bgp_active(p);
                    335: 
                    336:   if ((p->p.proto_state == PS_STOP)
                    337:       && (p->outgoing_conn.state == BS_IDLE)
                    338:       && (p->incoming_conn.state == BS_IDLE))
                    339:     bgp_down(p);
                    340: }
                    341: 
                    342: void
                    343: bgp_stop(struct bgp_proto *p, unsigned subcode)
                    344: {
                    345:   proto_notify_state(&p->p, PS_STOP);
                    346:   bgp_graceful_close_conn(&p->outgoing_conn, subcode);
                    347:   bgp_graceful_close_conn(&p->incoming_conn, subcode);
                    348:   ev_schedule(p->event);
                    349: }
                    350: 
                    351: static inline void
                    352: bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
                    353: {
                    354:   if (conn->bgp->p.mrtdump & MD_STATES)
                    355:     mrt_dump_bgp_state_change(conn, conn->state, new_state);
                    356: 
                    357:   conn->state = new_state;
                    358: }
                    359: 
                    360: void
                    361: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
                    362: {
                    363:   /* Really, most of the work is done in bgp_rx_open(). */
                    364:   bgp_conn_set_state(conn, BS_OPENCONFIRM);
                    365: }
                    366: 
                    367: void
                    368: bgp_conn_enter_established_state(struct bgp_conn *conn)
                    369: {
                    370:   struct bgp_proto *p = conn->bgp;
                    371: 
                    372:   BGP_TRACE(D_EVENTS, "BGP session established");
                    373:   DBG("BGP: UP!!!\n");
                    374: 
                    375:   /* For multi-hop BGP sessions */
                    376:   if (ipa_zero(p->source_addr))
                    377:     p->source_addr = conn->sk->saddr;
                    378: 
                    379:   conn->sk->fast_rx = 0;
                    380: 
                    381:   p->conn = conn;
                    382:   p->last_error_class = 0;
                    383:   p->last_error_code = 0;
                    384:   p->feed_state = BFS_NONE;
                    385:   p->load_state = BFS_NONE;
                    386:   bgp_init_bucket_table(p);
                    387:   bgp_init_prefix_table(p, 8);
                    388: 
                    389:   int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
                    390: 
                    391:   if (p->p.gr_recovery && !peer_gr_ready)
                    392:     proto_graceful_restart_unlock(&p->p);
                    393: 
                    394:   if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
                    395:     p->p.gr_wait = 1;
                    396: 
                    397:   if (p->gr_active)
                    398:     tm_stop(p->gr_timer);
                    399: 
                    400:   if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
                    401:     bgp_graceful_restart_done(p);
                    402: 
                    403:   /* GR capability implies that neighbor will send End-of-RIB */
                    404:   if (conn->peer_gr_aware)
                    405:     p->load_state = BFS_LOADING;
                    406: 
                    407:   /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
                    408: 
                    409:   bgp_conn_set_state(conn, BS_ESTABLISHED);
                    410:   proto_notify_state(&p->p, PS_UP);
                    411: }
                    412: 
                    413: static void
                    414: bgp_conn_leave_established_state(struct bgp_proto *p)
                    415: {
                    416:   BGP_TRACE(D_EVENTS, "BGP session closed");
                    417:   p->conn = NULL;
                    418: 
                    419:   bgp_free_prefix_table(p);
                    420:   bgp_free_bucket_table(p);
                    421: 
                    422:   if (p->p.proto_state == PS_UP)
                    423:     bgp_stop(p, 0);
                    424: }
                    425: 
                    426: void
                    427: bgp_conn_enter_close_state(struct bgp_conn *conn)
                    428: {
                    429:   struct bgp_proto *p = conn->bgp;
                    430:   int os = conn->state;
                    431: 
                    432:   bgp_conn_set_state(conn, BS_CLOSE);
                    433:   tm_stop(conn->keepalive_timer);
                    434:   conn->sk->rx_hook = NULL;
                    435: 
                    436:   /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
                    437:   bgp_start_timer(conn->hold_timer, 10);
                    438: 
                    439:   if (os == BS_ESTABLISHED)
                    440:     bgp_conn_leave_established_state(p);
                    441: }
                    442: 
                    443: void
                    444: bgp_conn_enter_idle_state(struct bgp_conn *conn)
                    445: {
                    446:   struct bgp_proto *p = conn->bgp;
                    447:   int os = conn->state;
                    448: 
                    449:   bgp_close_conn(conn);
                    450:   bgp_conn_set_state(conn, BS_IDLE);
                    451:   ev_schedule(p->event);
                    452: 
                    453:   if (os == BS_ESTABLISHED)
                    454:     bgp_conn_leave_established_state(p);
                    455: }
                    456: 
                    457: /**
                    458:  * bgp_handle_graceful_restart - handle detected BGP graceful restart
                    459:  * @p: BGP instance
                    460:  *
                    461:  * This function is called when a BGP graceful restart of the neighbor is
                    462:  * detected (when the TCP connection fails or when a new TCP connection
                    463:  * appears). The function activates processing of the restart - starts routing
                    464:  * table refresh cycle and activates BGP restart timer. The protocol state goes
                    465:  * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
                    466:  * caller.
                    467:  */
                    468: void
                    469: bgp_handle_graceful_restart(struct bgp_proto *p)
                    470: {
                    471:   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
                    472: 
                    473:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
                    474:            p->gr_active ? " - already pending" : "");
                    475:   proto_notify_state(&p->p, PS_START);
                    476: 
                    477:   if (p->gr_active)
                    478:     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
                    479: 
                    480:   p->gr_active = 1;
                    481:   bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
                    482:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
                    483: }
                    484: 
                    485: /**
                    486:  * bgp_graceful_restart_done - finish active BGP graceful restart
                    487:  * @p: BGP instance
                    488:  *
                    489:  * This function is called when the active BGP graceful restart of the neighbor
                    490:  * should be finished - either successfully (the neighbor sends all paths and
                    491:  * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
                    492:  * not support BGP graceful restart on the new session). The function ends
                    493:  * routing table refresh cycle and stops BGP restart timer.
                    494:  */
                    495: void
                    496: bgp_graceful_restart_done(struct bgp_proto *p)
                    497: {
                    498:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
                    499:   p->gr_active = 0;
                    500:   tm_stop(p->gr_timer);
                    501:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
                    502: }
                    503: 
                    504: /**
                    505:  * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
                    506:  * @t: timer
                    507:  *
                    508:  * This function is a timeout hook for @gr_timer, implementing BGP restart time
                    509:  * limit for reestablisment of the BGP session after the graceful restart. When
                    510:  * fired, we just proceed with the usual protocol restart.
                    511:  */
                    512: 
                    513: static void
                    514: bgp_graceful_restart_timeout(timer *t)
                    515: {
                    516:   struct bgp_proto *p = t->data;
                    517: 
                    518:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
                    519:   bgp_stop(p, 0);
                    520: }
                    521: 
                    522: 
                    523: /**
                    524:  * bgp_refresh_begin - start incoming enhanced route refresh sequence
                    525:  * @p: BGP instance
                    526:  *
                    527:  * This function is called when an incoming enhanced route refresh sequence is
                    528:  * started by the neighbor, demarcated by the BoRR packet. The function updates
                    529:  * the load state and starts the routing table refresh cycle. Note that graceful
                    530:  * restart also uses routing table refresh cycle, but RFC 7313 and load states
                    531:  * ensure that these two sequences do not overlap.
                    532:  */
                    533: void
                    534: bgp_refresh_begin(struct bgp_proto *p)
                    535: {
                    536:   if (p->load_state == BFS_LOADING)
                    537:     { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
                    538: 
                    539:   p->load_state = BFS_REFRESHING;
                    540:   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
                    541: }
                    542: 
                    543: /**
                    544:  * bgp_refresh_end - finish incoming enhanced route refresh sequence
                    545:  * @p: BGP instance
                    546:  *
                    547:  * This function is called when an incoming enhanced route refresh sequence is
                    548:  * finished by the neighbor, demarcated by the EoRR packet. The function updates
                    549:  * the load state and ends the routing table refresh cycle. Routes not received
                    550:  * during the sequence are removed by the nest.
                    551:  */
                    552: void
                    553: bgp_refresh_end(struct bgp_proto *p)
                    554: {
                    555:   if (p->load_state != BFS_REFRESHING)
                    556:     { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
                    557: 
                    558:   p->load_state = BFS_NONE;
                    559:   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
                    560: }
                    561: 
                    562: 
                    563: static void
                    564: bgp_send_open(struct bgp_conn *conn)
                    565: {
                    566:   conn->start_state = conn->bgp->start_state;
                    567: 
                    568:   // Default values, possibly changed by receiving capabilities.
                    569:   conn->advertised_as = 0;
                    570:   conn->peer_refresh_support = 0;
                    571:   conn->peer_as4_support = 0;
                    572:   conn->peer_add_path = 0;
                    573:   conn->peer_enhanced_refresh_support = 0;
                    574:   conn->peer_gr_aware = 0;
                    575:   conn->peer_gr_able = 0;
                    576:   conn->peer_gr_time = 0;
                    577:   conn->peer_gr_flags = 0;
                    578:   conn->peer_gr_aflags = 0;
                    579:   conn->peer_ext_messages_support = 0;
                    580: 
                    581:   DBG("BGP: Sending open\n");
                    582:   conn->sk->rx_hook = bgp_rx;
                    583:   conn->sk->tx_hook = bgp_tx;
                    584:   tm_stop(conn->connect_retry_timer);
                    585:   bgp_schedule_packet(conn, PKT_OPEN);
                    586:   bgp_conn_set_state(conn, BS_OPENSENT);
                    587:   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
                    588: }
                    589: 
                    590: static void
                    591: bgp_connected(sock *sk)
                    592: {
                    593:   struct bgp_conn *conn = sk->data;
                    594:   struct bgp_proto *p = conn->bgp;
                    595: 
                    596:   BGP_TRACE(D_EVENTS, "Connected");
                    597:   bgp_send_open(conn);
                    598: }
                    599: 
                    600: static void
                    601: bgp_connect_timeout(timer *t)
                    602: {
                    603:   struct bgp_conn *conn = t->data;
                    604:   struct bgp_proto *p = conn->bgp;
                    605: 
                    606:   DBG("BGP: connect_timeout\n");
                    607:   if (p->p.proto_state == PS_START)
                    608:     {
                    609:       bgp_close_conn(conn);
                    610:       bgp_connect(p);
                    611:     }
                    612:   else
                    613:     bgp_conn_enter_idle_state(conn);
                    614: }
                    615: 
                    616: static void
                    617: bgp_sock_err(sock *sk, int err)
                    618: {
                    619:   struct bgp_conn *conn = sk->data;
                    620:   struct bgp_proto *p = conn->bgp;
                    621: 
                    622:   /*
                    623:    * This error hook may be called either asynchronously from main
                    624:    * loop, or synchronously from sk_send().  But sk_send() is called
                    625:    * only from bgp_tx() and bgp_kick_tx(), which are both called
                    626:    * asynchronously from main loop. Moreover, they end if err hook is
                    627:    * called. Therefore, we could suppose that it is always called
                    628:    * asynchronously.
                    629:    */
                    630: 
                    631:   bgp_store_error(p, conn, BE_SOCKET, err);
                    632: 
                    633:   if (err)
                    634:     BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
                    635:   else
                    636:     BGP_TRACE(D_EVENTS, "Connection closed");
                    637: 
                    638:   if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
                    639:     bgp_handle_graceful_restart(p);
                    640: 
                    641:   bgp_conn_enter_idle_state(conn);
                    642: }
                    643: 
                    644: static void
                    645: bgp_hold_timeout(timer *t)
                    646: {
                    647:   struct bgp_conn *conn = t->data;
                    648:   struct bgp_proto *p = conn->bgp;
                    649: 
                    650:   DBG("BGP: Hold timeout\n");
                    651: 
                    652:   /* We are already closing the connection - just do hangup */
                    653:   if (conn->state == BS_CLOSE)
                    654:   {
                    655:     BGP_TRACE(D_EVENTS, "Connection stalled");
                    656:     bgp_conn_enter_idle_state(conn);
                    657:     return;
                    658:   }
                    659: 
                    660:   /* If there is something in input queue, we are probably congested
                    661:      and perhaps just not processed BGP packets in time. */
                    662: 
                    663:   if (sk_rx_ready(conn->sk) > 0)
                    664:     bgp_start_timer(conn->hold_timer, 10);
                    665:   else
                    666:     bgp_error(conn, 4, 0, NULL, 0);
                    667: }
                    668: 
                    669: static void
                    670: bgp_keepalive_timeout(timer *t)
                    671: {
                    672:   struct bgp_conn *conn = t->data;
                    673: 
                    674:   DBG("BGP: Keepalive timer\n");
                    675:   bgp_schedule_packet(conn, PKT_KEEPALIVE);
                    676: 
                    677:   /* Kick TX a bit faster */
                    678:   if (ev_active(conn->tx_ev))
                    679:     ev_run(conn->tx_ev);
                    680: }
                    681: 
                    682: static void
                    683: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
                    684: {
                    685:   timer *t;
                    686: 
                    687:   conn->sk = NULL;
                    688:   conn->bgp = p;
                    689:   conn->packets_to_send = 0;
                    690: 
                    691:   t = conn->connect_retry_timer = tm_new(p->p.pool);
                    692:   t->hook = bgp_connect_timeout;
                    693:   t->data = conn;
                    694:   t = conn->hold_timer = tm_new(p->p.pool);
                    695:   t->hook = bgp_hold_timeout;
                    696:   t->data = conn;
                    697:   t = conn->keepalive_timer = tm_new(p->p.pool);
                    698:   t->hook = bgp_keepalive_timeout;
                    699:   t->data = conn;
                    700:   conn->tx_ev = ev_new(p->p.pool);
                    701:   conn->tx_ev->hook = bgp_kick_tx;
                    702:   conn->tx_ev->data = conn;
                    703: }
                    704: 
                    705: static void
                    706: bgp_setup_sk(struct bgp_conn *conn, sock *s)
                    707: {
                    708:   s->data = conn;
                    709:   s->err_hook = bgp_sock_err;
                    710:   s->fast_rx = 1;
                    711:   conn->sk = s;
                    712: }
                    713: 
                    714: static void
                    715: bgp_active(struct bgp_proto *p)
                    716: {
                    717:   int delay = MAX(1, p->cf->connect_delay_time);
                    718:   struct bgp_conn *conn = &p->outgoing_conn;
                    719: 
                    720:   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
                    721:   bgp_setup_conn(p, conn);
                    722:   bgp_conn_set_state(conn, BS_ACTIVE);
                    723:   bgp_start_timer(conn->connect_retry_timer, delay);
                    724: }
                    725: 
                    726: /**
                    727:  * bgp_connect - initiate an outgoing connection
                    728:  * @p: BGP instance
                    729:  *
                    730:  * The bgp_connect() function creates a new &bgp_conn and initiates
                    731:  * a TCP connection to the peer. The rest of connection setup is governed
                    732:  * by the BGP state machine as described in the standard.
                    733:  */
                    734: static void
                    735: bgp_connect(struct bgp_proto *p)       /* Enter Connect state and start establishing connection */
                    736: {
                    737:   sock *s;
                    738:   struct bgp_conn *conn = &p->outgoing_conn;
                    739:   int hops = p->cf->multihop ? : 1;
                    740: 
                    741:   DBG("BGP: Connecting\n");
                    742:   s = sk_new(p->p.pool);
                    743:   s->type = SK_TCP_ACTIVE;
                    744:   s->saddr = p->source_addr;
                    745:   s->daddr = p->cf->remote_ip;
                    746:   s->dport = p->cf->remote_port;
                    747:   s->iface = p->neigh ? p->neigh->iface : NULL;
                    748:   s->ttl = p->cf->ttl_security ? 255 : hops;
                    749:   s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
                    750:   s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
                    751:   s->tos = IP_PREC_INTERNET_CONTROL;
                    752:   s->password = p->cf->password;
                    753:   s->tx_hook = bgp_connected;
                    754:   BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
                    755:            s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
                    756:   bgp_setup_conn(p, conn);
                    757:   bgp_setup_sk(conn, s);
                    758:   bgp_conn_set_state(conn, BS_CONNECT);
                    759: 
                    760:   if (sk_open(s) < 0)
                    761:     goto err;
                    762: 
                    763:   /* Set minimal receive TTL if needed */
                    764:   if (p->cf->ttl_security)
                    765:     if (sk_set_min_ttl(s, 256 - hops) < 0)
                    766:       goto err;
                    767: 
                    768:   DBG("BGP: Waiting for connect success\n");
                    769:   bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
                    770:   return;
                    771: 
                    772:  err:
                    773:   sk_log_error(s, p->p.name);
                    774:   bgp_sock_err(s, 0);
                    775:   return;
                    776: }
                    777: 
                    778: /**
                    779:  * bgp_find_proto - find existing proto for incoming connection
                    780:  * @sk: TCP socket
                    781:  *
                    782:  */
                    783: static struct bgp_proto *
                    784: bgp_find_proto(sock *sk)
                    785: {
                    786:   struct proto_config *pc;
                    787: 
                    788:   WALK_LIST(pc, config->protos)
                    789:     if ((pc->protocol == &proto_bgp) && pc->proto)
                    790:       {
                    791:        struct bgp_proto *p = (struct bgp_proto *) pc->proto;
                    792:        if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
                    793:            (!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
                    794:          return p;
                    795:       }
                    796: 
                    797:   return NULL;
                    798: }
                    799: 
                    800: /**
                    801:  * bgp_incoming_connection - handle an incoming connection
                    802:  * @sk: TCP socket
                    803:  * @dummy: unused
                    804:  *
                    805:  * This function serves as a socket hook for accepting of new BGP
                    806:  * connections. It searches a BGP instance corresponding to the peer
                    807:  * which has connected and if such an instance exists, it creates a
                    808:  * &bgp_conn structure, attaches it to the instance and either sends
                    809:  * an Open message or (if there already is an active connection) it
                    810:  * closes the new connection by sending a Notification message.
                    811:  */
                    812: static int
                    813: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
                    814: {
                    815:   struct bgp_proto *p;
                    816:   int acc, hops;
                    817: 
                    818:   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
                    819:   p = bgp_find_proto(sk);
                    820:   if (!p)
                    821:     {
                    822:       log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
                    823:          sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
                    824:       rfree(sk);
                    825:       return 0;
                    826:     }
                    827: 
                    828:   /*
                    829:    * BIRD should keep multiple incoming connections in OpenSent state (for
                    830:    * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
                    831:    * connections are rejected istead. The exception is the case where an
                    832:    * incoming connection triggers a graceful restart.
                    833:    */
                    834: 
                    835:   acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
                    836:     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
                    837: 
                    838:   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
                    839:     {
                    840:       bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
                    841:       bgp_handle_graceful_restart(p);
                    842:       bgp_conn_enter_idle_state(p->conn);
                    843:       acc = 1;
                    844: 
                    845:       /* There might be separate incoming connection in OpenSent state */
                    846:       if (p->incoming_conn.state > BS_ACTIVE)
                    847:        bgp_close_conn(&p->incoming_conn);
                    848:     }
                    849: 
                    850:   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
                    851:            sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
                    852:            sk->dport, acc ? "accepted" : "rejected");
                    853: 
                    854:   if (!acc)
                    855:     {
                    856:       rfree(sk);
                    857:       return 0;
                    858:     }
                    859: 
                    860:   hops = p->cf->multihop ? : 1;
                    861: 
                    862:   if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
                    863:     goto err;
                    864: 
                    865:   if (p->cf->ttl_security)
                    866:     if (sk_set_min_ttl(sk, 256 - hops) < 0)
                    867:       goto err;
                    868: 
                    869:   if (p->cf->enable_extended_messages)
                    870:     {
                    871:       sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
                    872:       sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
                    873:       sk_reallocate(sk);
                    874:     }
                    875: 
                    876:   bgp_setup_conn(p, &p->incoming_conn);
                    877:   bgp_setup_sk(&p->incoming_conn, sk);
                    878:   bgp_send_open(&p->incoming_conn);
                    879:   return 0;
                    880: 
                    881: err:
                    882:   sk_log_error(sk, p->p.name);
                    883:   log(L_ERR "%s: Incoming connection aborted", p->p.name);
                    884:   rfree(sk);
                    885:   return 0;
                    886: }
                    887: 
                    888: static void
                    889: bgp_listen_sock_err(sock *sk UNUSED, int err)
                    890: {
                    891:   if (err == ECONNABORTED)
                    892:     log(L_WARN "BGP: Incoming connection aborted");
                    893:   else
                    894:     log(L_ERR "BGP: Error on listening socket: %M", err);
                    895: }
                    896: 
                    897: static sock *
                    898: bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
                    899: {
                    900:   sock *s = sk_new(&root_pool);
                    901:   DBG("BGP: Creating listening socket\n");
                    902:   s->type = SK_TCP_PASSIVE;
                    903:   s->ttl = 255;
                    904:   s->saddr = addr;
                    905:   s->sport = port ? port : BGP_PORT;
                    906:   s->flags = flags ? 0 : SKF_V6ONLY;
                    907:   s->tos = IP_PREC_INTERNET_CONTROL;
                    908:   s->rbsize = BGP_RX_BUFFER_SIZE;
                    909:   s->tbsize = BGP_TX_BUFFER_SIZE;
                    910:   s->rx_hook = bgp_incoming_connection;
                    911:   s->err_hook = bgp_listen_sock_err;
                    912: 
                    913:   if (sk_open(s) < 0)
                    914:     goto err;
                    915: 
                    916:   return s;
                    917: 
                    918:  err:
                    919:   sk_log_error(s, "BGP");
                    920:   log(L_ERR "BGP: Cannot open listening socket");
                    921:   rfree(s);
                    922:   return NULL;
                    923: }
                    924: 
                    925: static void
                    926: bgp_start_neighbor(struct bgp_proto *p)
                    927: {
                    928:   /* Called only for single-hop BGP sessions */
                    929: 
                    930:   if (ipa_zero(p->source_addr))
                    931:     p->source_addr = p->neigh->ifa->ip;
                    932: 
                    933: #ifdef IPV6
                    934:   {
                    935:     struct ifa *a;
                    936:     p->local_link = IPA_NONE;
                    937:     WALK_LIST(a, p->neigh->iface->addrs)
                    938:       if (a->scope == SCOPE_LINK)
                    939:         {
                    940:          p->local_link = a->ip;
                    941:          break;
                    942:        }
                    943: 
                    944:     if (! ipa_nonzero(p->local_link))
                    945:       log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
                    946: 
                    947:     DBG("BGP: Selected link-level address %I\n", p->local_link);
                    948:   }
                    949: #endif
                    950: 
                    951:   bgp_initiate(p);
                    952: }
                    953: 
                    954: static void
                    955: bgp_neigh_notify(neighbor *n)
                    956: {
                    957:   struct bgp_proto *p = (struct bgp_proto *) n->proto;
                    958:   int ps = p->p.proto_state;
                    959: 
                    960:   if (n != p->neigh)
                    961:     return;
                    962: 
                    963:   if ((ps == PS_DOWN) || (ps == PS_STOP))
                    964:     return;
                    965: 
                    966:   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
                    967: 
                    968:   if (n->scope <= 0)
                    969:     {
                    970:       if (!prepare)
                    971:         {
                    972:          BGP_TRACE(D_EVENTS, "Neighbor lost");
                    973:          bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
                    974:          /* Perhaps also run bgp_update_startup_delay(p)? */
                    975:          bgp_stop(p, 0);
                    976:        }
                    977:     }
                    978:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
                    979:     {
                    980:       if (!prepare)
                    981:         {
                    982:          BGP_TRACE(D_EVENTS, "Link down");
                    983:          bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
                    984:          if (ps == PS_UP)
                    985:            bgp_update_startup_delay(p);
                    986:          bgp_stop(p, 0);
                    987:        }
                    988:     }
                    989:   else
                    990:     {
                    991:       if (prepare)
                    992:        {
                    993:          BGP_TRACE(D_EVENTS, "Neighbor ready");
                    994:          bgp_start_neighbor(p);
                    995:        }
                    996:     }
                    997: }
                    998: 
                    999: static void
                   1000: bgp_bfd_notify(struct bfd_request *req)
                   1001: {
                   1002:   struct bgp_proto *p = req->data;
                   1003:   int ps = p->p.proto_state;
                   1004: 
                   1005:   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
                   1006:     {
                   1007:       BGP_TRACE(D_EVENTS, "BFD session down");
                   1008:       bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
                   1009:       if (ps == PS_UP)
                   1010:        bgp_update_startup_delay(p);
                   1011:       bgp_stop(p, 0);
                   1012:     }
                   1013: }
                   1014: 
                   1015: static void
                   1016: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
                   1017: {
                   1018:   if (use_bfd && !p->bfd_req)
                   1019:     p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
                   1020:                                     p->cf->multihop ? NULL : p->neigh->iface,
                   1021:                                     bgp_bfd_notify, p);
                   1022: 
                   1023:   if (!use_bfd && p->bfd_req)
                   1024:     {
                   1025:       rfree(p->bfd_req);
                   1026:       p->bfd_req = NULL;
                   1027:     }
                   1028: }
                   1029: 
                   1030: static int
                   1031: bgp_reload_routes(struct proto *P)
                   1032: {
                   1033:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1034:   if (!p->conn || !p->conn->peer_refresh_support)
                   1035:     return 0;
                   1036: 
                   1037:   bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
                   1038:   return 1;
                   1039: }
                   1040: 
                   1041: static void
                   1042: bgp_feed_begin(struct proto *P, int initial)
                   1043: {
                   1044:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1045: 
                   1046:   /* This should not happen */
                   1047:   if (!p->conn)
                   1048:     return;
                   1049: 
                   1050:   if (initial && p->cf->gr_mode)
                   1051:     p->feed_state = BFS_LOADING;
                   1052: 
                   1053:   /* It is refeed and both sides support enhanced route refresh */
                   1054:   if (!initial && p->cf->enable_refresh &&
                   1055:       p->conn->peer_enhanced_refresh_support)
                   1056:     {
                   1057:       /* BoRR must not be sent before End-of-RIB */
                   1058:       if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
                   1059:        return;
                   1060: 
                   1061:       p->feed_state = BFS_REFRESHING;
                   1062:       bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
                   1063:     }
                   1064: }
                   1065: 
                   1066: static void
                   1067: bgp_feed_end(struct proto *P)
                   1068: {
                   1069:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1070: 
                   1071:   /* This should not happen */
                   1072:   if (!p->conn)
                   1073:     return;
                   1074: 
                   1075:   /* Non-demarcated feed ended, nothing to do */
                   1076:   if (p->feed_state == BFS_NONE)
                   1077:     return;
                   1078: 
                   1079:   /* Schedule End-of-RIB packet */
                   1080:   if (p->feed_state == BFS_LOADING)
                   1081:     p->feed_state = BFS_LOADED;
                   1082: 
                   1083:   /* Schedule EoRR packet */
                   1084:   if (p->feed_state == BFS_REFRESHING)
                   1085:     p->feed_state = BFS_REFRESHED;
                   1086: 
                   1087:   /* Kick TX hook */
                   1088:   bgp_schedule_packet(p->conn, PKT_UPDATE);
                   1089: }
                   1090: 
                   1091: 
                   1092: static void
                   1093: bgp_start_locked(struct object_lock *lock)
                   1094: {
                   1095:   struct bgp_proto *p = lock->data;
                   1096:   struct bgp_config *cf = p->cf;
                   1097: 
                   1098:   if (p->p.proto_state != PS_START)
                   1099:     {
                   1100:       DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
                   1101:       return;
                   1102:     }
                   1103: 
                   1104:   DBG("BGP: Got lock\n");
                   1105: 
                   1106:   if (cf->multihop)
                   1107:     {
                   1108:       /* Multi-hop sessions do not use neighbor entries */
                   1109:       bgp_initiate(p);
                   1110:       return;
                   1111:     }
                   1112: 
                   1113:   neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
                   1114:   if (!n)
                   1115:     {
                   1116:       log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
                   1117:       /* As we do not start yet, we can just disable protocol */
                   1118:       p->p.disabled = 1;
                   1119:       bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
                   1120:       proto_notify_state(&p->p, PS_DOWN);
                   1121:       return;
                   1122:     }
                   1123: 
                   1124:   p->neigh = n;
                   1125: 
                   1126:   if (n->scope <= 0)
                   1127:     BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
                   1128:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
                   1129:     BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
                   1130:   else
                   1131:     bgp_start_neighbor(p);
                   1132: }
                   1133: 
                   1134: static int
                   1135: bgp_start(struct proto *P)
                   1136: {
                   1137:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1138:   struct object_lock *lock;
                   1139: 
                   1140:   DBG("BGP: Startup.\n");
                   1141:   p->start_state = BSS_PREPARE;
                   1142:   p->outgoing_conn.state = BS_IDLE;
                   1143:   p->incoming_conn.state = BS_IDLE;
                   1144:   p->neigh = NULL;
                   1145:   p->bfd_req = NULL;
                   1146:   p->gr_ready = 0;
                   1147:   p->gr_active = 0;
                   1148: 
                   1149:   rt_lock_table(p->igp_table);
                   1150: 
                   1151:   p->event = ev_new(p->p.pool);
                   1152:   p->event->hook = bgp_decision;
                   1153:   p->event->data = p;
                   1154: 
                   1155:   p->startup_timer = tm_new(p->p.pool);
                   1156:   p->startup_timer->hook = bgp_startup_timeout;
                   1157:   p->startup_timer->data = p;
                   1158: 
                   1159:   p->gr_timer = tm_new(p->p.pool);
                   1160:   p->gr_timer->hook = bgp_graceful_restart_timeout;
                   1161:   p->gr_timer->data = p;
                   1162: 
                   1163:   p->local_id = proto_get_router_id(P->cf);
                   1164:   if (p->rr_client)
                   1165:     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
                   1166: 
                   1167:   p->remote_id = 0;
                   1168:   p->source_addr = p->cf->source_addr;
                   1169: 
                   1170:   if (p->p.gr_recovery && p->cf->gr_mode)
                   1171:     proto_graceful_restart_lock(P);
                   1172: 
                   1173:   /*
                   1174:    *  Before attempting to create the connection, we need to lock the
                   1175:    *  port, so that are sure we're the only instance attempting to talk
                   1176:    *  with that neighbor.
                   1177:    */
                   1178: 
                   1179:   lock = p->lock = olock_new(P->pool);
                   1180:   lock->addr = p->cf->remote_ip;
                   1181:   lock->port = p->cf->remote_port;
                   1182:   lock->iface = p->cf->iface;
                   1183:   lock->type = OBJLOCK_TCP;
                   1184:   lock->hook = bgp_start_locked;
                   1185:   lock->data = p;
                   1186:   olock_acquire(lock);
                   1187: 
                   1188:   return PS_START;
                   1189: }
                   1190: 
                   1191: extern int proto_restart;
                   1192: 
                   1193: static int
                   1194: bgp_shutdown(struct proto *P)
                   1195: {
                   1196:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1197:   unsigned subcode = 0;
                   1198: 
                   1199:   BGP_TRACE(D_EVENTS, "Shutdown requested");
                   1200: 
                   1201:   switch (P->down_code)
                   1202:     {
                   1203:     case PDC_CF_REMOVE:
                   1204:     case PDC_CF_DISABLE:
                   1205:       subcode = 3; // Errcode 6, 3 - peer de-configured
                   1206:       break;
                   1207: 
                   1208:     case PDC_CF_RESTART:
                   1209:       subcode = 6; // Errcode 6, 6 - other configuration change
                   1210:       break;
                   1211: 
                   1212:     case PDC_CMD_DISABLE:
                   1213:     case PDC_CMD_SHUTDOWN:
                   1214:       subcode = 2; // Errcode 6, 2 - administrative shutdown
                   1215:       break;
                   1216: 
                   1217:     case PDC_CMD_RESTART:
                   1218:       subcode = 4; // Errcode 6, 4 - administrative reset
                   1219:       break;
                   1220: 
                   1221:     case PDC_RX_LIMIT_HIT:
                   1222:     case PDC_IN_LIMIT_HIT:
                   1223:       subcode = 1; // Errcode 6, 1 - max number of prefixes reached
                   1224:       /* log message for compatibility */
                   1225:       log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
                   1226:       goto limit;
                   1227: 
                   1228:     case PDC_OUT_LIMIT_HIT:
                   1229:       subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
                   1230: 
                   1231:     limit:
                   1232:       bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
                   1233:       if (proto_restart)
                   1234:        bgp_update_startup_delay(p);
                   1235:       else
                   1236:        p->startup_delay = 0;
                   1237:       goto done;
                   1238:     }
                   1239: 
                   1240:   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
                   1241:   p->startup_delay = 0;
                   1242: 
                   1243:  done:
                   1244:   bgp_stop(p, subcode);
                   1245:   return p->p.proto_state;
                   1246: }
                   1247: 
                   1248: static void
                   1249: bgp_cleanup(struct proto *P)
                   1250: {
                   1251:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1252:   rt_unlock_table(p->igp_table);
                   1253: }
                   1254: 
                   1255: static rtable *
                   1256: get_igp_table(struct bgp_config *cf)
                   1257: {
                   1258:   return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
                   1259: }
                   1260: 
                   1261: static struct proto *
                   1262: bgp_init(struct proto_config *C)
                   1263: {
                   1264:   struct proto *P = proto_new(C, sizeof(struct bgp_proto));
                   1265:   struct bgp_config *c = (struct bgp_config *) C;
                   1266:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1267: 
                   1268:   P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
                   1269:   P->rt_notify = bgp_rt_notify;
                   1270:   P->import_control = bgp_import_control;
                   1271:   P->neigh_notify = bgp_neigh_notify;
                   1272:   P->reload_routes = bgp_reload_routes;
                   1273:   P->feed_begin = bgp_feed_begin;
                   1274:   P->feed_end = bgp_feed_end;
                   1275:   P->rte_better = bgp_rte_better;
                   1276:   P->rte_mergable = bgp_rte_mergable;
                   1277:   P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
                   1278: 
                   1279:   p->cf = c;
                   1280:   p->local_as = c->local_as;
                   1281:   p->remote_as = c->remote_as;
                   1282:   p->is_internal = (c->local_as == c->remote_as);
                   1283:   p->rs_client = c->rs_client;
                   1284:   p->rr_client = c->rr_client;
                   1285:   p->igp_table = get_igp_table(c);
                   1286: 
                   1287:   return P;
                   1288: }
                   1289: 
                   1290: 
                   1291: void
                   1292: bgp_check_config(struct bgp_config *c)
                   1293: {
                   1294:   int internal = (c->local_as == c->remote_as);
                   1295: 
                   1296:   /* Do not check templates at all */
                   1297:   if (c->c.class == SYM_TEMPLATE)
                   1298:     return;
                   1299: 
                   1300: 
                   1301:   /* EBGP direct by default, IBGP multihop by default */
                   1302:   if (c->multihop < 0)
                   1303:     c->multihop = internal ? 64 : 0;
                   1304: 
                   1305:   /* Different default for gw_mode */
                   1306:   if (!c->gw_mode)
                   1307:     c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
                   1308: 
                   1309:   /* Different default based on rs_client */
                   1310:   if (!c->missing_lladdr)
                   1311:     c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
                   1312: 
                   1313:   /* Disable after error incompatible with restart limit action */
                   1314:   if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
                   1315:     c->c.in_limit->action = PLA_DISABLE;
                   1316: 
                   1317: 
                   1318:   if (!c->local_as)
                   1319:     cf_error("Local AS number must be set");
                   1320: 
                   1321:   if (ipa_zero(c->remote_ip))
                   1322:     cf_error("Neighbor must be configured");
                   1323: 
                   1324:   if (!c->remote_as)
                   1325:     cf_error("Remote AS number must be set");
                   1326: 
                   1327:   // if (ipa_is_link_local(c->remote_ip) && !c->iface)
                   1328:   //   cf_error("Link-local neighbor address requires specified interface");
                   1329: 
                   1330:   if (!ipa_is_link_local(c->remote_ip) != !c->iface)
                   1331:     cf_error("Link-local address and interface scope must be used together");
                   1332: 
                   1333:   if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
                   1334:     cf_error("Neighbor AS number out of range (AS4 not available)");
                   1335: 
                   1336:   if (!internal && c->rr_client)
                   1337:     cf_error("Only internal neighbor can be RR client");
                   1338: 
                   1339:   if (internal && c->rs_client)
                   1340:     cf_error("Only external neighbor can be RS client");
                   1341: 
                   1342:   if (c->multihop && (c->gw_mode == GW_DIRECT))
                   1343:     cf_error("Multihop BGP cannot use direct gateway mode");
                   1344: 
                   1345:   if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
                   1346:                      ipa_is_link_local(c->source_addr)))
                   1347:     cf_error("Multihop BGP cannot be used with link-local addresses");
                   1348: 
                   1349:   if (c->multihop && c->check_link)
                   1350:     cf_error("Multihop BGP cannot depend on link state");
                   1351: 
                   1352:   if (c->multihop && c->bfd && ipa_zero(c->source_addr))
                   1353:     cf_error("Multihop BGP with BFD requires specified source address");
                   1354: 
                   1355:   if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
                   1356:     cf_error("BGP in recursive mode prohibits sorted table");
                   1357: 
                   1358:   if (c->deterministic_med && c->c.table->sorted)
                   1359:     cf_error("BGP with deterministic MED prohibits sorted table");
                   1360: 
                   1361:   if (c->secondary && !c->c.table->sorted)
                   1362:     cf_error("BGP with secondary option requires sorted table");
                   1363: }
                   1364: 
                   1365: static int
                   1366: bgp_reconfigure(struct proto *P, struct proto_config *C)
                   1367: {
                   1368:   struct bgp_config *new = (struct bgp_config *) C;
                   1369:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1370:   struct bgp_config *old = p->cf;
                   1371: 
                   1372:   if (proto_get_router_id(C) != p->local_id)
                   1373:     return 0;
                   1374: 
                   1375:   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
                   1376:                     ((byte *) new) + sizeof(struct proto_config),
                   1377:                     // password item is last and must be checked separately
                   1378:                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
                   1379:     && ((!old->password && !new->password)
                   1380:        || (old->password && new->password && !strcmp(old->password, new->password)))
                   1381:     && (get_igp_table(old) == get_igp_table(new));
                   1382: 
                   1383:   if (same && (p->start_state > BSS_PREPARE))
                   1384:     bgp_update_bfd(p, new->bfd);
                   1385: 
                   1386:   /* We should update our copy of configuration ptr as old configuration will be freed */
                   1387:   if (same)
                   1388:     p->cf = new;
                   1389: 
                   1390:   return same;
                   1391: }
                   1392: 
                   1393: static void
                   1394: bgp_copy_config(struct proto_config *dest, struct proto_config *src)
                   1395: {
                   1396:   /* Just a shallow copy */
                   1397:   proto_copy_rest(dest, src, sizeof(struct bgp_config));
                   1398: }
                   1399: 
                   1400: 
                   1401: /**
                   1402:  * bgp_error - report a protocol error
                   1403:  * @c: connection
                   1404:  * @code: error code (according to the RFC)
                   1405:  * @subcode: error sub-code
                   1406:  * @data: data to be passed in the Notification message
                   1407:  * @len: length of the data
                   1408:  *
                   1409:  * bgp_error() sends a notification packet to tell the other side that a protocol
                   1410:  * error has occurred (including the data considered erroneous if possible) and
                   1411:  * closes the connection.
                   1412:  */
                   1413: void
                   1414: bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
                   1415: {
                   1416:   struct bgp_proto *p = c->bgp;
                   1417: 
                   1418:   if (c->state == BS_CLOSE)
                   1419:     return;
                   1420: 
                   1421:   bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
                   1422:   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
                   1423:   bgp_conn_enter_close_state(c);
                   1424: 
                   1425:   c->notify_code = code;
                   1426:   c->notify_subcode = subcode;
                   1427:   c->notify_data = data;
                   1428:   c->notify_size = (len > 0) ? len : 0;
                   1429:   bgp_schedule_packet(c, PKT_NOTIFICATION);
                   1430: 
                   1431:   if (code != 6)
                   1432:     {
                   1433:       bgp_update_startup_delay(p);
                   1434:       bgp_stop(p, 0);
                   1435:     }
                   1436: }
                   1437: 
                   1438: /**
                   1439:  * bgp_store_error - store last error for status report
                   1440:  * @p: BGP instance
                   1441:  * @c: connection
                   1442:  * @class: error class (BE_xxx constants)
                   1443:  * @code: error code (class specific)
                   1444:  *
                   1445:  * bgp_store_error() decides whether given error is interesting enough
                   1446:  * and store that error to last_error variables of @p
                   1447:  */
                   1448: void
                   1449: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
                   1450: {
                   1451:   /* During PS_UP, we ignore errors on secondary connection */
                   1452:   if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
                   1453:     return;
                   1454: 
                   1455:   /* During PS_STOP, we ignore any errors, as we want to report
                   1456:    * the error that caused transition to PS_STOP
                   1457:    */
                   1458:   if (p->p.proto_state == PS_STOP)
                   1459:     return;
                   1460: 
                   1461:   p->last_error_class = class;
                   1462:   p->last_error_code = code;
                   1463: }
                   1464: 
                   1465: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
                   1466: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
                   1467: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
                   1468: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
                   1469: 
                   1470: static const char *
                   1471: bgp_last_errmsg(struct bgp_proto *p)
                   1472: {
                   1473:   switch (p->last_error_class)
                   1474:     {
                   1475:     case BE_MISC:
                   1476:       return bgp_misc_errors[p->last_error_code];
                   1477:     case BE_SOCKET:
                   1478:       return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
                   1479:     case BE_BGP_RX:
                   1480:     case BE_BGP_TX:
                   1481:       return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
                   1482:     case BE_AUTO_DOWN:
                   1483:       return bgp_auto_errors[p->last_error_code];
                   1484:     default:
                   1485:       return "";
                   1486:     }
                   1487: }
                   1488: 
                   1489: static const char *
                   1490: bgp_state_dsc(struct bgp_proto *p)
                   1491: {
                   1492:   if (p->p.proto_state == PS_DOWN)
                   1493:     return "Down";
                   1494: 
                   1495:   int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
                   1496:   if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
                   1497:     return "Passive";
                   1498: 
                   1499:   return bgp_state_names[state];
                   1500: }
                   1501: 
                   1502: static void
                   1503: bgp_get_status(struct proto *P, byte *buf)
                   1504: {
                   1505:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1506: 
                   1507:   const char *err1 = bgp_err_classes[p->last_error_class];
                   1508:   const char *err2 = bgp_last_errmsg(p);
                   1509: 
                   1510:   if (P->proto_state == PS_DOWN)
                   1511:     bsprintf(buf, "%s%s", err1, err2);
                   1512:   else
                   1513:     bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
                   1514: }
                   1515: 
                   1516: static void
                   1517: bgp_show_proto_info(struct proto *P)
                   1518: {
                   1519:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1520:   struct bgp_conn *c = p->conn;
                   1521: 
                   1522:   proto_show_basic_info(P);
                   1523: 
                   1524:   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
                   1525:   cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
                   1526:   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
                   1527: 
                   1528:   if (p->gr_active)
                   1529:     cli_msg(-1006, "    Neighbor graceful restart active");
                   1530: 
                   1531:   if (P->proto_state == PS_START)
                   1532:     {
                   1533:       struct bgp_conn *oc = &p->outgoing_conn;
                   1534: 
                   1535:       if ((p->start_state < BSS_CONNECT) &&
                   1536:          (p->startup_timer->expires))
                   1537:        cli_msg(-1006, "    Error wait:       %d/%d",
                   1538:                p->startup_timer->expires - now, p->startup_delay);
                   1539: 
                   1540:       if ((oc->state == BS_ACTIVE) &&
                   1541:          (oc->connect_retry_timer->expires))
                   1542:        cli_msg(-1006, "    Connect delay:    %d/%d",
                   1543:                oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
                   1544: 
                   1545:       if (p->gr_active && p->gr_timer->expires)
                   1546:        cli_msg(-1006, "    Restart timer:    %d/-", p->gr_timer->expires - now);
                   1547:     }
                   1548:   else if (P->proto_state == PS_UP)
                   1549:     {
                   1550:       cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
                   1551:       cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
                   1552:              c->peer_refresh_support ? " refresh" : "",
                   1553:              c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
                   1554:              c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
                   1555:              c->peer_as4_support ? " AS4" : "",
                   1556:              (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
                   1557:              (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
                   1558:              c->peer_ext_messages_support ? " ext-messages" : "");
                   1559:       cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
                   1560:              p->is_internal ? "internal" : "external",
                   1561:              p->cf->multihop ? " multihop" : "",
                   1562:              p->rr_client ? " route-reflector" : "",
                   1563:              p->rs_client ? " route-server" : "",
                   1564:              p->as4_session ? " AS4" : "",
                   1565:              p->add_path_rx ? " add-path-rx" : "",
                   1566:              p->add_path_tx ? " add-path-tx" : "",
                   1567:              p->ext_messages ? " ext-messages" : "");
                   1568:       cli_msg(-1006, "    Source address:   %I", p->source_addr);
                   1569:       if (P->cf->in_limit)
                   1570:        cli_msg(-1006, "    Route limit:      %d/%d",
                   1571:                p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
                   1572:       cli_msg(-1006, "    Hold timer:       %d/%d",
                   1573:              tm_remains(c->hold_timer), c->hold_time);
                   1574:       cli_msg(-1006, "    Keepalive timer:  %d/%d",
                   1575:              tm_remains(c->keepalive_timer), c->keepalive_time);
                   1576:     }
                   1577: 
                   1578:   if ((p->last_error_class != BE_NONE) &&
                   1579:       (p->last_error_class != BE_MAN_DOWN))
                   1580:     {
                   1581:       const char *err1 = bgp_err_classes[p->last_error_class];
                   1582:       const char *err2 = bgp_last_errmsg(p);
                   1583:       cli_msg(-1006, "    Last error:       %s%s", err1, err2);
                   1584:     }
                   1585: }
                   1586: 
                   1587: struct protocol proto_bgp = {
                   1588:   .name =              "BGP",
                   1589:   .template =          "bgp%d",
                   1590:   .attr_class =        EAP_BGP,
                   1591:   .preference =        DEF_PREF_BGP,
                   1592:   .config_size =       sizeof(struct bgp_config),
                   1593:   .init =              bgp_init,
                   1594:   .start =             bgp_start,
                   1595:   .shutdown =          bgp_shutdown,
                   1596:   .cleanup =           bgp_cleanup,
                   1597:   .reconfigure =       bgp_reconfigure,
                   1598:   .copy_config =       bgp_copy_config,
                   1599:   .get_status =        bgp_get_status,
                   1600:   .get_attr =          bgp_get_attr,
                   1601:   .get_route_info =    bgp_get_route_info,
                   1602:   .show_proto_info =   bgp_show_proto_info
                   1603: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>