Annotation of embedaddon/bird2/proto/bgp/bgp.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD -- The Border Gateway Protocol
                      3:  *
                      4:  *     (c) 2000 Martin Mares <mj@ucw.cz>
                      5:  *     (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
                      6:  *     (c) 2008--2016 CZ.NIC z.s.p.o.
                      7:  *
                      8:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      9:  */
                     10: 
                     11: /**
                     12:  * DOC: Border Gateway Protocol
                     13:  *
                     14:  * The BGP protocol is implemented in three parts: |bgp.c| which takes care of
                     15:  * the connection and most of the interface with BIRD core, |packets.c| handling
                     16:  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
                     17:  * manipulation with BGP attribute lists.
                     18:  *
                     19:  * As opposed to the other existing routing daemons, BIRD has a sophisticated
                     20:  * core architecture which is able to keep all the information needed by BGP in
                     21:  * the primary routing table, therefore no complex data structures like a
                     22:  * central BGP table are needed. This increases memory footprint of a BGP router
                     23:  * with many connections, but not too much and, which is more important, it
                     24:  * makes BGP much easier to implement.
                     25:  *
                     26:  * Each instance of BGP (corresponding to a single BGP peer) is described by a
                     27:  * &bgp_proto structure to which are attached individual connections represented
                     28:  * by &bgp_connection (usually, there exists only one connection, but during BGP
                     29:  * session setup, there can be more of them). The connections are handled
                     30:  * according to the BGP state machine defined in the RFC with all the timers and
                     31:  * all the parameters configurable.
                     32:  *
                     33:  * In incoming direction, we listen on the connection's socket and each time we
                     34:  * receive some input, we pass it to bgp_rx(). It decodes packet headers and the
                     35:  * markers and passes complete packets to bgp_rx_packet() which distributes the
                     36:  * packet according to its type.
                     37:  *
                     38:  * In outgoing direction, we gather all the routing updates and sort them to
                     39:  * buckets (&bgp_bucket) according to their attributes (we keep a hash table for
                     40:  * fast comparison of &rta's and a &fib which helps us to find if we already
                     41:  * have another route for the same destination queued for sending, so that we
                     42:  * can replace it with the new one immediately instead of sending both
                     43:  * updates). There also exists a special bucket holding all the route
                     44:  * withdrawals which cannot be queued anywhere else as they don't have any
                     45:  * attributes. If we have any packet to send (due to either new routes or the
                     46:  * connection tracking code wanting to send a Open, Keepalive or Notification
                     47:  * message), we call bgp_schedule_packet() which sets the corresponding bit in a
                     48:  * @packet_to_send bit field in &bgp_conn and as soon as the transmit socket
                     49:  * buffer becomes empty, we call bgp_fire_tx(). It inspects state of all the
                     50:  * packet type bits and calls the corresponding bgp_create_xx() functions,
                     51:  * eventually rescheduling the same packet type if we have more data of the same
                     52:  * type to send.
                     53:  *
                     54:  * The processing of attributes consists of two functions: bgp_decode_attrs()
                     55:  * for checking of the attribute blocks and translating them to the language of
                     56:  * BIRD's extended attributes and bgp_encode_attrs() which does the
                     57:  * converse. Both functions are built around a @bgp_attr_table array describing
                     58:  * all important characteristics of all known attributes.  Unknown transitive
                     59:  * attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
                     60:  *
                     61:  * BGP protocol implements graceful restart in both restarting (local restart)
                     62:  * and receiving (neighbor restart) roles. The first is handled mostly by the
                     63:  * graceful restart code in the nest, BGP protocol just handles capabilities,
                     64:  * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
                     65:  * The second is implemented by internal restart of the BGP state to %BS_IDLE
                     66:  * and protocol state to %PS_START, but keeping the protocol up from the core
                     67:  * point of view and therefore maintaining received routes. Routing table
                     68:  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
                     69:  * stale routes after reestablishment of BGP session during graceful restart.
                     70:  *
                     71:  * Supported standards:
                     72:  * RFC 4271 - Border Gateway Protocol 4 (BGP)
                     73:  * RFC 1997 - BGP Communities Attribute
                     74:  * RFC 2385 - Protection of BGP Sessions via TCP MD5 Signature
                     75:  * RFC 2545 - Use of BGP Multiprotocol Extensions for IPv6
                     76:  * RFC 2918 - Route Refresh Capability
                     77:  * RFC 3107 - Carrying Label Information in BGP
                     78:  * RFC 4360 - BGP Extended Communities Attribute
                     79:  * RFC 4364 - BGP/MPLS IPv4 Virtual Private Networks
                     80:  * RFC 4456 - BGP Route Reflection
                     81:  * RFC 4486 - Subcodes for BGP Cease Notification Message
                     82:  * RFC 4659 - BGP/MPLS IPv6 Virtual Private Networks
                     83:  * RFC 4724 - Graceful Restart Mechanism for BGP
                     84:  * RFC 4760 - Multiprotocol extensions for BGP
                     85:  * RFC 4798 - Connecting IPv6 Islands over IPv4 MPLS
                     86:  * RFC 5065 - AS confederations for BGP
                     87:  * RFC 5082 - Generalized TTL Security Mechanism
                     88:  * RFC 5492 - Capabilities Advertisement with BGP
                     89:  * RFC 5549 - Advertising IPv4 NLRI with an IPv6 Next Hop
                     90:  * RFC 5575 - Dissemination of Flow Specification Rules
                     91:  * RFC 5668 - 4-Octet AS Specific BGP Extended Community
                     92:  * RFC 6286 - AS-Wide Unique BGP Identifier
                     93:  * RFC 6608 - Subcodes for BGP Finite State Machine Error
                     94:  * RFC 6793 - BGP Support for 4-Octet AS Numbers
                     95:  * RFC 7311 - Accumulated IGP Metric Attribute for BGP
                     96:  * RFC 7313 - Enhanced Route Refresh Capability for BGP
                     97:  * RFC 7606 - Revised Error Handling for BGP UPDATE Messages
                     98:  * RFC 7911 - Advertisement of Multiple Paths in BGP
                     99:  * RFC 7947 - Internet Exchange BGP Route Server
                    100:  * RFC 8092 - BGP Large Communities Attribute
                    101:  * RFC 8203 - BGP Administrative Shutdown Communication
                    102:  * RFC 8212 - Default EBGP Route Propagation Behavior without Policies
                    103:  * draft-ietf-idr-bgp-extended-messages-27
                    104:  * draft-ietf-idr-ext-opt-param-07
                    105:  * draft-uttaro-idr-bgp-persistence-04
                    106:  */
                    107: 
                    108: #undef LOCAL_DEBUG
                    109: 
                    110: #include <stdlib.h>
                    111: 
                    112: #include "nest/bird.h"
                    113: #include "nest/iface.h"
                    114: #include "nest/protocol.h"
                    115: #include "nest/route.h"
                    116: #include "nest/cli.h"
                    117: #include "nest/locks.h"
                    118: #include "conf/conf.h"
                    119: #include "filter/filter.h"
                    120: #include "lib/socket.h"
                    121: #include "lib/resource.h"
                    122: #include "lib/string.h"
                    123: 
                    124: #include "bgp.h"
                    125: 
                    126: 
                    127: struct linpool *bgp_linpool;           /* Global temporary pool */
                    128: struct linpool *bgp_linpool2;          /* Global temporary pool for bgp_rt_notify() */
                    129: static list bgp_sockets;               /* Global list of listening sockets */
                    130: 
                    131: 
                    132: static void bgp_connect(struct bgp_proto *p);
                    133: static void bgp_active(struct bgp_proto *p);
                    134: static void bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn);
                    135: static void bgp_setup_sk(struct bgp_conn *conn, sock *s);
                    136: static void bgp_send_open(struct bgp_conn *conn);
                    137: static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
                    138: 
                    139: static int bgp_incoming_connection(sock *sk, uint dummy UNUSED);
                    140: static void bgp_listen_sock_err(sock *sk UNUSED, int err);
                    141: 
                    142: /**
                    143:  * bgp_open - open a BGP instance
                    144:  * @p: BGP instance
                    145:  *
                    146:  * This function allocates and configures shared BGP resources, mainly listening
                    147:  * sockets. Should be called as the last step during initialization (when lock
                    148:  * is acquired and neighbor is ready). When error, caller should change state to
                    149:  * PS_DOWN and return immediately.
                    150:  */
                    151: static int
                    152: bgp_open(struct bgp_proto *p)
                    153: {
                    154:   struct bgp_socket *bs = NULL;
                    155:   struct iface *ifa = p->cf->strict_bind ? p->cf->iface : NULL;
                    156:   ip_addr addr = p->cf->strict_bind ? p->cf->local_ip :
                    157:     (p->ipv4 ? IPA_NONE4 : IPA_NONE6);
                    158:   uint port = p->cf->local_port;
                    159: 
                    160:   /* FIXME: Add some global init? */
                    161:   if (!bgp_linpool)
                    162:     init_list(&bgp_sockets);
                    163: 
                    164:   /* We assume that cf->iface is defined iff cf->local_ip is link-local */
                    165: 
                    166:   WALK_LIST(bs, bgp_sockets)
                    167:     if (ipa_equal(bs->sk->saddr, addr) && (bs->sk->sport == port) &&
                    168:        (bs->sk->iface == ifa) && (bs->sk->vrf == p->p.vrf))
                    169:     {
                    170:       bs->uc++;
                    171:       p->sock = bs;
                    172:       return 0;
                    173:     }
                    174: 
                    175:   sock *sk = sk_new(proto_pool);
                    176:   sk->type = SK_TCP_PASSIVE;
                    177:   sk->ttl = 255;
                    178:   sk->saddr = addr;
                    179:   sk->sport = port;
                    180:   sk->iface = ifa;
                    181:   sk->vrf = p->p.vrf;
                    182:   sk->flags = 0;
                    183:   sk->tos = IP_PREC_INTERNET_CONTROL;
                    184:   sk->rbsize = BGP_RX_BUFFER_SIZE;
                    185:   sk->tbsize = BGP_TX_BUFFER_SIZE;
                    186:   sk->rx_hook = bgp_incoming_connection;
                    187:   sk->err_hook = bgp_listen_sock_err;
                    188: 
                    189:   if (sk_open(sk) < 0)
                    190:     goto err;
                    191: 
                    192:   bs = mb_allocz(proto_pool, sizeof(struct bgp_socket));
                    193:   bs->sk = sk;
                    194:   bs->uc = 1;
                    195:   p->sock = bs;
                    196:   sk->data = bs;
                    197: 
                    198:   add_tail(&bgp_sockets, &bs->n);
                    199: 
                    200:   if (!bgp_linpool)
                    201:   {
                    202:     bgp_linpool  = lp_new_default(proto_pool);
                    203:     bgp_linpool2 = lp_new_default(proto_pool);
                    204:   }
                    205: 
                    206:   return 0;
                    207: 
                    208: err:
                    209:   sk_log_error(sk, p->p.name);
                    210:   log(L_ERR "%s: Cannot open listening socket", p->p.name);
                    211:   rfree(sk);
                    212:   return -1;
                    213: }
                    214: 
                    215: /**
                    216:  * bgp_close - close a BGP instance
                    217:  * @p: BGP instance
                    218:  *
                    219:  * This function frees and deconfigures shared BGP resources.
                    220:  */
                    221: static void
                    222: bgp_close(struct bgp_proto *p)
                    223: {
                    224:   struct bgp_socket *bs = p->sock;
                    225: 
                    226:   ASSERT(bs && bs->uc);
                    227: 
                    228:   if (--bs->uc)
                    229:     return;
                    230: 
                    231:   rfree(bs->sk);
                    232:   rem_node(&bs->n);
                    233:   mb_free(bs);
                    234: 
                    235:   if (!EMPTY_LIST(bgp_sockets))
                    236:     return;
                    237: 
                    238:   rfree(bgp_linpool);
                    239:   bgp_linpool = NULL;
                    240: 
                    241:   rfree(bgp_linpool2);
                    242:   bgp_linpool2 = NULL;
                    243: }
                    244: 
                    245: static inline int
                    246: bgp_setup_auth(struct bgp_proto *p, int enable)
                    247: {
                    248:   if (p->cf->password)
                    249:   {
                    250:     int rv = sk_set_md5_auth(p->sock->sk,
                    251:                             p->cf->local_ip, p->cf->remote_ip, p->cf->iface,
                    252:                             enable ? p->cf->password : NULL, p->cf->setkey);
                    253: 
                    254:     if (rv < 0)
                    255:       sk_log_error(p->sock->sk, p->p.name);
                    256: 
                    257:     return rv;
                    258:   }
                    259:   else
                    260:     return 0;
                    261: }
                    262: 
                    263: static inline struct bgp_channel *
                    264: bgp_find_channel(struct bgp_proto *p, u32 afi)
                    265: {
                    266:   struct bgp_channel *c;
                    267:   WALK_LIST(c, p->p.channels)
                    268:     if (c->afi == afi)
                    269:       return c;
                    270: 
                    271:   return NULL;
                    272: }
                    273: 
                    274: static void
                    275: bgp_startup(struct bgp_proto *p)
                    276: {
                    277:   BGP_TRACE(D_EVENTS, "Started");
                    278:   p->start_state = BSS_CONNECT;
                    279: 
                    280:   if (!p->passive)
                    281:     bgp_active(p);
                    282: 
                    283:   if (p->postponed_sk)
                    284:   {
                    285:     /* Apply postponed incoming connection */
                    286:     bgp_setup_conn(p, &p->incoming_conn);
                    287:     bgp_setup_sk(&p->incoming_conn, p->postponed_sk);
                    288:     bgp_send_open(&p->incoming_conn);
                    289:     p->postponed_sk = NULL;
                    290:   }
                    291: }
                    292: 
                    293: static void
                    294: bgp_startup_timeout(timer *t)
                    295: {
                    296:   bgp_startup(t->data);
                    297: }
                    298: 
                    299: 
                    300: static void
                    301: bgp_initiate(struct bgp_proto *p)
                    302: {
                    303:   int err_val;
                    304: 
                    305:   if (bgp_open(p) < 0)
                    306:   { err_val = BEM_NO_SOCKET; goto err1; }
                    307: 
                    308:   if (bgp_setup_auth(p, 1) < 0)
                    309:   { err_val = BEM_INVALID_MD5; goto err2; }
                    310: 
                    311:   if (p->cf->bfd)
                    312:     bgp_update_bfd(p, p->cf->bfd);
                    313: 
                    314:   if (p->startup_delay)
                    315:   {
                    316:     p->start_state = BSS_DELAY;
                    317:     BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
                    318:     bgp_start_timer(p->startup_timer, p->startup_delay);
                    319:   }
                    320:   else
                    321:     bgp_startup(p);
                    322: 
                    323:   return;
                    324: 
                    325: err2:
                    326:   bgp_close(p);
                    327: err1:
                    328:   p->p.disabled = 1;
                    329:   bgp_store_error(p, NULL, BE_MISC, err_val);
                    330:   proto_notify_state(&p->p, PS_DOWN);
                    331: 
                    332:   return;
                    333: }
                    334: 
                    335: /**
                    336:  * bgp_start_timer - start a BGP timer
                    337:  * @t: timer
                    338:  * @value: time (in seconds) to fire (0 to disable the timer)
                    339:  *
                    340:  * This functions calls tm_start() on @t with time @value and the amount of
                    341:  * randomization suggested by the BGP standard. Please use it for all BGP
                    342:  * timers.
                    343:  */
                    344: void
                    345: bgp_start_timer(timer *t, uint value)
                    346: {
                    347:   if (value)
                    348:   {
                    349:     /* The randomization procedure is specified in RFC 4271 section 10 */
                    350:     btime time = value S;
                    351:     btime randomize = random() % ((time / 4) + 1);
                    352:     tm_start(t, time - randomize);
                    353:   }
                    354:   else
                    355:     tm_stop(t);
                    356: }
                    357: 
                    358: /**
                    359:  * bgp_close_conn - close a BGP connection
                    360:  * @conn: connection to close
                    361:  *
                    362:  * This function takes a connection described by the &bgp_conn structure, closes
                    363:  * its socket and frees all resources associated with it.
                    364:  */
                    365: void
                    366: bgp_close_conn(struct bgp_conn *conn)
                    367: {
                    368:   // struct bgp_proto *p = conn->bgp;
                    369: 
                    370:   DBG("BGP: Closing connection\n");
                    371:   conn->packets_to_send = 0;
                    372:   conn->channels_to_send = 0;
                    373:   rfree(conn->connect_timer);
                    374:   conn->connect_timer = NULL;
                    375:   rfree(conn->keepalive_timer);
                    376:   conn->keepalive_timer = NULL;
                    377:   rfree(conn->hold_timer);
                    378:   conn->hold_timer = NULL;
                    379:   rfree(conn->tx_ev);
                    380:   conn->tx_ev = NULL;
                    381:   rfree(conn->sk);
                    382:   conn->sk = NULL;
                    383: 
                    384:   mb_free(conn->local_caps);
                    385:   conn->local_caps = NULL;
                    386:   mb_free(conn->remote_caps);
                    387:   conn->remote_caps = NULL;
                    388: }
                    389: 
                    390: 
                    391: /**
                    392:  * bgp_update_startup_delay - update a startup delay
                    393:  * @p: BGP instance
                    394:  *
                    395:  * This function updates a startup delay that is used to postpone next BGP
                    396:  * connect. It also handles disable_after_error and might stop BGP instance
                    397:  * when error happened and disable_after_error is on.
                    398:  *
                    399:  * It should be called when BGP protocol error happened.
                    400:  */
                    401: void
                    402: bgp_update_startup_delay(struct bgp_proto *p)
                    403: {
                    404:   const struct bgp_config *cf = p->cf;
                    405: 
                    406:   DBG("BGP: Updating startup delay\n");
                    407: 
                    408:   if (p->last_proto_error && ((current_time() - p->last_proto_error) >= cf->error_amnesia_time S))
                    409:     p->startup_delay = 0;
                    410: 
                    411:   p->last_proto_error = current_time();
                    412: 
                    413:   if (cf->disable_after_error)
                    414:   {
                    415:     p->startup_delay = 0;
                    416:     p->p.disabled = 1;
                    417:     return;
                    418:   }
                    419: 
                    420:   if (!p->startup_delay)
                    421:     p->startup_delay = cf->error_delay_time_min;
                    422:   else
                    423:     p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
                    424: }
                    425: 
                    426: static void
                    427: bgp_graceful_close_conn(struct bgp_conn *conn, int subcode, byte *data, uint len)
                    428: {
                    429:   switch (conn->state)
                    430:   {
                    431:   case BS_IDLE:
                    432:   case BS_CLOSE:
                    433:     return;
                    434: 
                    435:   case BS_CONNECT:
                    436:   case BS_ACTIVE:
                    437:     bgp_conn_enter_idle_state(conn);
                    438:     return;
                    439: 
                    440:   case BS_OPENSENT:
                    441:   case BS_OPENCONFIRM:
                    442:   case BS_ESTABLISHED:
                    443:     if (subcode < 0)
                    444:     {
                    445:       bgp_conn_enter_close_state(conn);
                    446:       bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
                    447:     }
                    448:     else
                    449:       bgp_error(conn, 6, subcode, data, len);
                    450:     return;
                    451: 
                    452:   default:
                    453:     bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
                    454:   }
                    455: }
                    456: 
                    457: static void
                    458: bgp_down(struct bgp_proto *p)
                    459: {
                    460:   if (p->start_state > BSS_PREPARE)
                    461:   {
                    462:     bgp_setup_auth(p, 0);
                    463:     bgp_close(p);
                    464:   }
                    465: 
                    466:   BGP_TRACE(D_EVENTS, "Down");
                    467:   proto_notify_state(&p->p, PS_DOWN);
                    468: }
                    469: 
                    470: static void
                    471: bgp_decision(void *vp)
                    472: {
                    473:   struct bgp_proto *p = vp;
                    474: 
                    475:   DBG("BGP: Decision start\n");
                    476:   if ((p->p.proto_state == PS_START) &&
                    477:       (p->outgoing_conn.state == BS_IDLE) &&
                    478:       (p->incoming_conn.state != BS_OPENCONFIRM) &&
                    479:       !p->passive)
                    480:     bgp_active(p);
                    481: 
                    482:   if ((p->p.proto_state == PS_STOP) &&
                    483:       (p->outgoing_conn.state == BS_IDLE) &&
                    484:       (p->incoming_conn.state == BS_IDLE))
                    485:     bgp_down(p);
                    486: }
                    487: 
                    488: static struct bgp_proto *
                    489: bgp_spawn(struct bgp_proto *pp, ip_addr remote_ip)
                    490: {
                    491:   struct symbol *sym;
                    492:   char fmt[SYM_MAX_LEN];
                    493: 
                    494:   bsprintf(fmt, "%s%%0%dd", pp->cf->dynamic_name, pp->cf->dynamic_name_digits);
                    495: 
                    496:   /* This is hack, we would like to share config, but we need to copy it now */
                    497:   new_config = config;
                    498:   cfg_mem = config->mem;
                    499:   conf_this_scope = config->root_scope;
                    500:   sym = cf_default_name(fmt, &(pp->dynamic_name_counter));
                    501:   proto_clone_config(sym, pp->p.cf);
                    502:   new_config = NULL;
                    503:   cfg_mem = NULL;
                    504: 
                    505:   /* Just pass remote_ip to bgp_init() */
                    506:   ((struct bgp_config *) sym->proto)->remote_ip = remote_ip;
                    507: 
                    508:   return (void *) proto_spawn(sym->proto, 0);
                    509: }
                    510: 
                    511: void
                    512: bgp_stop(struct bgp_proto *p, int subcode, byte *data, uint len)
                    513: {
                    514:   proto_notify_state(&p->p, PS_STOP);
                    515:   bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
                    516:   bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
                    517:   ev_schedule(p->event);
                    518: }
                    519: 
                    520: static inline void
                    521: bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
                    522: {
                    523:   if (conn->bgp->p.mrtdump & MD_STATES)
                    524:     bgp_dump_state_change(conn, conn->state, new_state);
                    525: 
                    526:   conn->state = new_state;
                    527: }
                    528: 
                    529: void
                    530: bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
                    531: {
                    532:   /* Really, most of the work is done in bgp_rx_open(). */
                    533:   bgp_conn_set_state(conn, BS_OPENCONFIRM);
                    534: }
                    535: 
                    536: static const struct bgp_af_caps dummy_af_caps = { };
                    537: static const struct bgp_af_caps basic_af_caps = { .ready = 1 };
                    538: 
                    539: void
                    540: bgp_conn_enter_established_state(struct bgp_conn *conn)
                    541: {
                    542:   struct bgp_proto *p = conn->bgp;
                    543:   struct bgp_caps *local = conn->local_caps;
                    544:   struct bgp_caps *peer = conn->remote_caps;
                    545:   struct bgp_channel *c;
                    546: 
                    547:   BGP_TRACE(D_EVENTS, "BGP session established");
                    548: 
                    549:   /* For multi-hop BGP sessions */
                    550:   if (ipa_zero(p->local_ip))
                    551:     p->local_ip = conn->sk->saddr;
                    552: 
                    553:   /* For promiscuous sessions */
                    554:   if (!p->remote_as)
                    555:     p->remote_as = conn->received_as;
                    556: 
                    557:   /* In case of LLv6 is not valid during BGP start */
                    558:   if (ipa_zero(p->link_addr) && p->neigh && p->neigh->iface && p->neigh->iface->llv6)
                    559:     p->link_addr = p->neigh->iface->llv6->ip;
                    560: 
                    561:   conn->sk->fast_rx = 0;
                    562: 
                    563:   p->conn = conn;
                    564:   p->last_error_class = 0;
                    565:   p->last_error_code = 0;
                    566: 
                    567:   p->as4_session = conn->as4_session;
                    568: 
                    569:   p->route_refresh = peer->route_refresh;
                    570:   p->enhanced_refresh = local->enhanced_refresh && peer->enhanced_refresh;
                    571: 
                    572:   /* Whether we may handle possible GR/LLGR of peer (it has some AF GR-able) */
                    573:   p->gr_ready = p->llgr_ready = 0;     /* Updated later */
                    574: 
                    575:   /* Whether peer is ready to handle our GR recovery */
                    576:   int peer_gr_ready = peer->gr_aware && !(peer->gr_flags & BGP_GRF_RESTART);
                    577: 
                    578:   if (p->gr_active_num)
                    579:     tm_stop(p->gr_timer);
                    580: 
                    581:   /* Number of active channels */
                    582:   int num = 0;
                    583: 
                    584:   /* Summary state of ADD_PATH RX for active channels */
                    585:   uint summary_add_path_rx = 0;
                    586: 
                    587:   WALK_LIST(c, p->p.channels)
                    588:   {
                    589:     const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
                    590:     const struct bgp_af_caps *rem = bgp_find_af_caps(peer,  c->afi);
                    591: 
                    592:     /* Use default if capabilities were not announced */
                    593:     if (!local->length && (c->afi == BGP_AF_IPV4))
                    594:       loc = &basic_af_caps;
                    595: 
                    596:     if (!peer->length && (c->afi == BGP_AF_IPV4))
                    597:       rem = &basic_af_caps;
                    598: 
                    599:     /* Ignore AFIs that were not announced in multiprotocol capability */
                    600:     if (!loc || !loc->ready)
                    601:       loc = &dummy_af_caps;
                    602: 
                    603:     if (!rem || !rem->ready)
                    604:       rem = &dummy_af_caps;
                    605: 
                    606:     int active = loc->ready && rem->ready;
                    607:     c->c.disabled = !active;
                    608:     c->c.reloadable = p->route_refresh || c->cf->import_table;
                    609: 
                    610:     c->index = active ? num++ : 0;
                    611: 
                    612:     c->feed_state = BFS_NONE;
                    613:     c->load_state = BFS_NONE;
                    614: 
                    615:     /* Channels where peer may do GR */
                    616:     uint gr_ready = active && local->gr_aware && rem->gr_able;
                    617:     uint llgr_ready = active && local->llgr_aware && rem->llgr_able;
                    618: 
                    619:     c->gr_ready = gr_ready || llgr_ready;
                    620:     p->gr_ready = p->gr_ready || c->gr_ready;
                    621:     p->llgr_ready = p->llgr_ready || llgr_ready;
                    622: 
                    623:     /* Remember last LLGR stale time */
                    624:     c->stale_time = local->llgr_aware ? rem->llgr_time : 0;
                    625: 
                    626:     /* Channels not able to recover gracefully */
                    627:     if (p->p.gr_recovery && (!active || !peer_gr_ready))
                    628:       channel_graceful_restart_unlock(&c->c);
                    629: 
                    630:     /* Channels waiting for local convergence */
                    631:     if (p->p.gr_recovery && loc->gr_able && peer_gr_ready)
                    632:       c->c.gr_wait = 1;
                    633: 
                    634:     /* Channels where regular graceful restart failed */
                    635:     if ((c->gr_active == BGP_GRS_ACTIVE) &&
                    636:        !(active && rem->gr_able && (rem->gr_af_flags & BGP_GRF_FORWARDING)))
                    637:       bgp_graceful_restart_done(c);
                    638: 
                    639:     /* Channels where regular long-lived restart failed */
                    640:     if ((c->gr_active == BGP_GRS_LLGR) &&
                    641:        !(active && rem->llgr_able && (rem->gr_af_flags & BGP_LLGRF_FORWARDING)))
                    642:       bgp_graceful_restart_done(c);
                    643: 
                    644:     /* GR capability implies that neighbor will send End-of-RIB */
                    645:     if (peer->gr_aware)
                    646:       c->load_state = BFS_LOADING;
                    647: 
                    648:     c->ext_next_hop = c->cf->ext_next_hop && (bgp_channel_is_ipv6(c) || rem->ext_next_hop);
                    649:     c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
                    650:     c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
                    651: 
                    652:     if (active)
                    653:       summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
                    654: 
                    655:     /* Update RA mode */
                    656:     if (c->add_path_tx)
                    657:       c->c.ra_mode = RA_ANY;
                    658:     else if (c->cf->secondary)
                    659:       c->c.ra_mode = RA_ACCEPTED;
                    660:     else
                    661:       c->c.ra_mode = RA_OPTIMAL;
                    662:   }
                    663: 
                    664:   p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
                    665:   p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
                    666:   p->channel_count = num;
                    667:   p->summary_add_path_rx = summary_add_path_rx;
                    668: 
                    669:   WALK_LIST(c, p->p.channels)
                    670:   {
                    671:     if (c->c.disabled)
                    672:       continue;
                    673: 
                    674:     p->afi_map[c->index] = c->afi;
                    675:     p->channel_map[c->index] = c;
                    676:   }
                    677: 
                    678:   /* proto_notify_state() will likely call bgp_feed_begin(), setting c->feed_state */
                    679: 
                    680:   bgp_conn_set_state(conn, BS_ESTABLISHED);
                    681:   proto_notify_state(&p->p, PS_UP);
                    682: }
                    683: 
                    684: static void
                    685: bgp_conn_leave_established_state(struct bgp_proto *p)
                    686: {
                    687:   BGP_TRACE(D_EVENTS, "BGP session closed");
                    688:   p->conn = NULL;
                    689: 
                    690:   if (p->p.proto_state == PS_UP)
                    691:     bgp_stop(p, 0, NULL, 0);
                    692: }
                    693: 
                    694: void
                    695: bgp_conn_enter_close_state(struct bgp_conn *conn)
                    696: {
                    697:   struct bgp_proto *p = conn->bgp;
                    698:   int os = conn->state;
                    699: 
                    700:   bgp_conn_set_state(conn, BS_CLOSE);
                    701:   tm_stop(conn->keepalive_timer);
                    702:   conn->sk->rx_hook = NULL;
                    703: 
                    704:   /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
                    705:   bgp_start_timer(conn->hold_timer, 10);
                    706: 
                    707:   if (os == BS_ESTABLISHED)
                    708:     bgp_conn_leave_established_state(p);
                    709: }
                    710: 
                    711: void
                    712: bgp_conn_enter_idle_state(struct bgp_conn *conn)
                    713: {
                    714:   struct bgp_proto *p = conn->bgp;
                    715:   int os = conn->state;
                    716: 
                    717:   bgp_close_conn(conn);
                    718:   bgp_conn_set_state(conn, BS_IDLE);
                    719:   ev_schedule(p->event);
                    720: 
                    721:   if (os == BS_ESTABLISHED)
                    722:     bgp_conn_leave_established_state(p);
                    723: }
                    724: 
                    725: /**
                    726:  * bgp_handle_graceful_restart - handle detected BGP graceful restart
                    727:  * @p: BGP instance
                    728:  *
                    729:  * This function is called when a BGP graceful restart of the neighbor is
                    730:  * detected (when the TCP connection fails or when a new TCP connection
                    731:  * appears). The function activates processing of the restart - starts routing
                    732:  * table refresh cycle and activates BGP restart timer. The protocol state goes
                    733:  * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
                    734:  * caller.
                    735:  */
                    736: void
                    737: bgp_handle_graceful_restart(struct bgp_proto *p)
                    738: {
                    739:   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
                    740: 
                    741:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
                    742:            p->gr_active_num ? " - already pending" : "");
                    743: 
                    744:   p->gr_active_num = 0;
                    745: 
                    746:   struct bgp_channel *c;
                    747:   WALK_LIST(c, p->p.channels)
                    748:   {
                    749:     /* FIXME: perhaps check for channel state instead of disabled flag? */
                    750:     if (c->c.disabled)
                    751:       continue;
                    752: 
                    753:     if (c->gr_ready)
                    754:     {
                    755:       p->gr_active_num++;
                    756: 
                    757:       switch (c->gr_active)
                    758:       {
                    759:       case BGP_GRS_NONE:
                    760:        c->gr_active = BGP_GRS_ACTIVE;
                    761:        rt_refresh_begin(c->c.table, &c->c);
                    762:        break;
                    763: 
                    764:       case BGP_GRS_ACTIVE:
                    765:        rt_refresh_end(c->c.table, &c->c);
                    766:        rt_refresh_begin(c->c.table, &c->c);
                    767:        break;
                    768: 
                    769:       case BGP_GRS_LLGR:
                    770:        rt_refresh_begin(c->c.table, &c->c);
                    771:        rt_modify_stale(c->c.table, &c->c);
                    772:        break;
                    773:       }
                    774:     }
                    775:     else
                    776:     {
                    777:       /* Just flush the routes */
                    778:       rt_refresh_begin(c->c.table, &c->c);
                    779:       rt_refresh_end(c->c.table, &c->c);
                    780:     }
                    781: 
                    782:     /* Reset bucket and prefix tables */
                    783:     bgp_free_bucket_table(c);
                    784:     bgp_free_prefix_table(c);
                    785:     bgp_init_bucket_table(c);
                    786:     bgp_init_prefix_table(c);
                    787:     c->packets_to_send = 0;
                    788:   }
                    789: 
                    790:   /* p->gr_ready -> at least one active channel is c->gr_ready */
                    791:   ASSERT(p->gr_active_num > 0);
                    792: 
                    793:   proto_notify_state(&p->p, PS_START);
                    794:   tm_start(p->gr_timer, p->conn->remote_caps->gr_time S);
                    795: }
                    796: 
                    797: /**
                    798:  * bgp_graceful_restart_done - finish active BGP graceful restart
                    799:  * @c: BGP channel
                    800:  *
                    801:  * This function is called when the active BGP graceful restart of the neighbor
                    802:  * should be finished for channel @c - either successfully (the neighbor sends
                    803:  * all paths and reports end-of-RIB for given AFI/SAFI on the new session) or
                    804:  * unsuccessfully (the neighbor does not support BGP graceful restart on the new
                    805:  * session). The function ends the routing table refresh cycle.
                    806:  */
                    807: void
                    808: bgp_graceful_restart_done(struct bgp_channel *c)
                    809: {
                    810:   struct bgp_proto *p = (void *) c->c.proto;
                    811: 
                    812:   ASSERT(c->gr_active);
                    813:   c->gr_active = 0;
                    814:   p->gr_active_num--;
                    815: 
                    816:   if (!p->gr_active_num)
                    817:     BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
                    818: 
                    819:   tm_stop(c->stale_timer);
                    820:   rt_refresh_end(c->c.table, &c->c);
                    821: }
                    822: 
                    823: /**
                    824:  * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
                    825:  * @t: timer
                    826:  *
                    827:  * This function is a timeout hook for @gr_timer, implementing BGP restart time
                    828:  * limit for reestablisment of the BGP session after the graceful restart. When
                    829:  * fired, we just proceed with the usual protocol restart.
                    830:  */
                    831: 
                    832: static void
                    833: bgp_graceful_restart_timeout(timer *t)
                    834: {
                    835:   struct bgp_proto *p = t->data;
                    836: 
                    837:   BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
                    838: 
                    839:   if (p->llgr_ready)
                    840:   {
                    841:     struct bgp_channel *c;
                    842:     WALK_LIST(c, p->p.channels)
                    843:     {
                    844:       /* Channel is not in GR and is already flushed */
                    845:       if (!c->gr_active)
                    846:        continue;
                    847: 
                    848:       /* Channel is already in LLGR from past restart */
                    849:       if (c->gr_active == BGP_GRS_LLGR)
                    850:        continue;
                    851: 
                    852:       /* Channel is in GR, but does not support LLGR -> stop GR */
                    853:       if (!c->stale_time)
                    854:       {
                    855:        bgp_graceful_restart_done(c);
                    856:        continue;
                    857:       }
                    858: 
                    859:       /* Channel is in GR, and supports LLGR -> start LLGR */
                    860:       c->gr_active = BGP_GRS_LLGR;
                    861:       tm_start(c->stale_timer, c->stale_time S);
                    862:       rt_modify_stale(c->c.table, &c->c);
                    863:     }
                    864:   }
                    865:   else
                    866:     bgp_stop(p, 0, NULL, 0);
                    867: }
                    868: 
                    869: static void
                    870: bgp_long_lived_stale_timeout(timer *t)
                    871: {
                    872:   struct bgp_channel *c = t->data;
                    873:   struct bgp_proto *p = (void *) c->c.proto;
                    874: 
                    875:   BGP_TRACE(D_EVENTS, "Long-lived stale timeout");
                    876: 
                    877:   bgp_graceful_restart_done(c);
                    878: }
                    879: 
                    880: 
                    881: /**
                    882:  * bgp_refresh_begin - start incoming enhanced route refresh sequence
                    883:  * @c: BGP channel
                    884:  *
                    885:  * This function is called when an incoming enhanced route refresh sequence is
                    886:  * started by the neighbor, demarcated by the BoRR packet. The function updates
                    887:  * the load state and starts the routing table refresh cycle. Note that graceful
                    888:  * restart also uses routing table refresh cycle, but RFC 7313 and load states
                    889:  * ensure that these two sequences do not overlap.
                    890:  */
                    891: void
                    892: bgp_refresh_begin(struct bgp_channel *c)
                    893: {
                    894:   struct bgp_proto *p = (void *) c->c.proto;
                    895: 
                    896:   if (c->load_state == BFS_LOADING)
                    897:   { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
                    898: 
                    899:   c->load_state = BFS_REFRESHING;
                    900:   rt_refresh_begin(c->c.table, &c->c);
                    901: 
                    902:   if (c->c.in_table)
                    903:     rt_refresh_begin(c->c.in_table, &c->c);
                    904: }
                    905: 
                    906: /**
                    907:  * bgp_refresh_end - finish incoming enhanced route refresh sequence
                    908:  * @c: BGP channel
                    909:  *
                    910:  * This function is called when an incoming enhanced route refresh sequence is
                    911:  * finished by the neighbor, demarcated by the EoRR packet. The function updates
                    912:  * the load state and ends the routing table refresh cycle. Routes not received
                    913:  * during the sequence are removed by the nest.
                    914:  */
                    915: void
                    916: bgp_refresh_end(struct bgp_channel *c)
                    917: {
                    918:   struct bgp_proto *p = (void *) c->c.proto;
                    919: 
                    920:   if (c->load_state != BFS_REFRESHING)
                    921:   { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
                    922: 
                    923:   c->load_state = BFS_NONE;
                    924:   rt_refresh_end(c->c.table, &c->c);
                    925: 
                    926:   if (c->c.in_table)
                    927:     rt_prune_sync(c->c.in_table, 0);
                    928: }
                    929: 
                    930: 
                    931: static void
                    932: bgp_send_open(struct bgp_conn *conn)
                    933: {
                    934:   DBG("BGP: Sending open\n");
                    935:   conn->sk->rx_hook = bgp_rx;
                    936:   conn->sk->tx_hook = bgp_tx;
                    937:   tm_stop(conn->connect_timer);
                    938:   bgp_prepare_capabilities(conn);
                    939:   bgp_schedule_packet(conn, NULL, PKT_OPEN);
                    940:   bgp_conn_set_state(conn, BS_OPENSENT);
                    941:   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
                    942: }
                    943: 
                    944: static void
                    945: bgp_connected(sock *sk)
                    946: {
                    947:   struct bgp_conn *conn = sk->data;
                    948:   struct bgp_proto *p = conn->bgp;
                    949: 
                    950:   BGP_TRACE(D_EVENTS, "Connected");
                    951:   bgp_send_open(conn);
                    952: }
                    953: 
                    954: static void
                    955: bgp_connect_timeout(timer *t)
                    956: {
                    957:   struct bgp_conn *conn = t->data;
                    958:   struct bgp_proto *p = conn->bgp;
                    959: 
                    960:   DBG("BGP: connect_timeout\n");
                    961:   if (p->p.proto_state == PS_START)
                    962:   {
                    963:     bgp_close_conn(conn);
                    964:     bgp_connect(p);
                    965:   }
                    966:   else
                    967:     bgp_conn_enter_idle_state(conn);
                    968: }
                    969: 
                    970: static void
                    971: bgp_sock_err(sock *sk, int err)
                    972: {
                    973:   struct bgp_conn *conn = sk->data;
                    974:   struct bgp_proto *p = conn->bgp;
                    975: 
                    976:   /*
                    977:    * This error hook may be called either asynchronously from main
                    978:    * loop, or synchronously from sk_send().  But sk_send() is called
                    979:    * only from bgp_tx() and bgp_kick_tx(), which are both called
                    980:    * asynchronously from main loop. Moreover, they end if err hook is
                    981:    * called. Therefore, we could suppose that it is always called
                    982:    * asynchronously.
                    983:    */
                    984: 
                    985:   bgp_store_error(p, conn, BE_SOCKET, err);
                    986: 
                    987:   if (err)
                    988:     BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
                    989:   else
                    990:     BGP_TRACE(D_EVENTS, "Connection closed");
                    991: 
                    992:   if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
                    993:     bgp_handle_graceful_restart(p);
                    994: 
                    995:   bgp_conn_enter_idle_state(conn);
                    996: }
                    997: 
                    998: static void
                    999: bgp_hold_timeout(timer *t)
                   1000: {
                   1001:   struct bgp_conn *conn = t->data;
                   1002:   struct bgp_proto *p = conn->bgp;
                   1003: 
                   1004:   DBG("BGP: Hold timeout\n");
                   1005: 
                   1006:   /* We are already closing the connection - just do hangup */
                   1007:   if (conn->state == BS_CLOSE)
                   1008:   {
                   1009:     BGP_TRACE(D_EVENTS, "Connection stalled");
                   1010:     bgp_conn_enter_idle_state(conn);
                   1011:     return;
                   1012:   }
                   1013: 
                   1014:   /* If there is something in input queue, we are probably congested
                   1015:      and perhaps just not processed BGP packets in time. */
                   1016: 
                   1017:   if (sk_rx_ready(conn->sk) > 0)
                   1018:     bgp_start_timer(conn->hold_timer, 10);
                   1019:   else if ((conn->state == BS_ESTABLISHED) && p->llgr_ready)
                   1020:   {
                   1021:     BGP_TRACE(D_EVENTS, "Hold timer expired");
                   1022:     bgp_handle_graceful_restart(p);
                   1023:     bgp_conn_enter_idle_state(conn);
                   1024:   }
                   1025:   else
                   1026:     bgp_error(conn, 4, 0, NULL, 0);
                   1027: }
                   1028: 
                   1029: static void
                   1030: bgp_keepalive_timeout(timer *t)
                   1031: {
                   1032:   struct bgp_conn *conn = t->data;
                   1033: 
                   1034:   DBG("BGP: Keepalive timer\n");
                   1035:   bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
                   1036: 
                   1037:   /* Kick TX a bit faster */
                   1038:   if (ev_active(conn->tx_ev))
                   1039:     ev_run(conn->tx_ev);
                   1040: }
                   1041: 
                   1042: static void
                   1043: bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
                   1044: {
                   1045:   conn->sk = NULL;
                   1046:   conn->bgp = p;
                   1047: 
                   1048:   conn->packets_to_send = 0;
                   1049:   conn->channels_to_send = 0;
                   1050:   conn->last_channel = 0;
                   1051:   conn->last_channel_count = 0;
                   1052: 
                   1053:   conn->connect_timer  = tm_new_init(p->p.pool, bgp_connect_timeout,    conn, 0, 0);
                   1054:   conn->hold_timer     = tm_new_init(p->p.pool, bgp_hold_timeout,       conn, 0, 0);
                   1055:   conn->keepalive_timer        = tm_new_init(p->p.pool, bgp_keepalive_timeout, conn, 0, 0);
                   1056: 
                   1057:   conn->tx_ev = ev_new_init(p->p.pool, bgp_kick_tx, conn);
                   1058: }
                   1059: 
                   1060: static void
                   1061: bgp_setup_sk(struct bgp_conn *conn, sock *s)
                   1062: {
                   1063:   s->data = conn;
                   1064:   s->err_hook = bgp_sock_err;
                   1065:   s->fast_rx = 1;
                   1066:   conn->sk = s;
                   1067: }
                   1068: 
                   1069: static void
                   1070: bgp_active(struct bgp_proto *p)
                   1071: {
                   1072:   int delay = MAX(1, p->cf->connect_delay_time);
                   1073:   struct bgp_conn *conn = &p->outgoing_conn;
                   1074: 
                   1075:   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
                   1076:   bgp_setup_conn(p, conn);
                   1077:   bgp_conn_set_state(conn, BS_ACTIVE);
                   1078:   bgp_start_timer(conn->connect_timer, delay);
                   1079: }
                   1080: 
                   1081: /**
                   1082:  * bgp_connect - initiate an outgoing connection
                   1083:  * @p: BGP instance
                   1084:  *
                   1085:  * The bgp_connect() function creates a new &bgp_conn and initiates
                   1086:  * a TCP connection to the peer. The rest of connection setup is governed
                   1087:  * by the BGP state machine as described in the standard.
                   1088:  */
                   1089: static void
                   1090: bgp_connect(struct bgp_proto *p)       /* Enter Connect state and start establishing connection */
                   1091: {
                   1092:   struct bgp_conn *conn = &p->outgoing_conn;
                   1093:   int hops = p->cf->multihop ? : 1;
                   1094: 
                   1095:   DBG("BGP: Connecting\n");
                   1096:   sock *s = sk_new(p->p.pool);
                   1097:   s->type = SK_TCP_ACTIVE;
                   1098:   s->saddr = p->local_ip;
                   1099:   s->daddr = p->remote_ip;
                   1100:   s->dport = p->cf->remote_port;
                   1101:   s->iface = p->neigh ? p->neigh->iface : NULL;
                   1102:   s->vrf = p->p.vrf;
                   1103:   s->ttl = p->cf->ttl_security ? 255 : hops;
                   1104:   s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
                   1105:   s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
                   1106:   s->tos = IP_PREC_INTERNET_CONTROL;
                   1107:   s->password = p->cf->password;
                   1108:   s->tx_hook = bgp_connected;
                   1109:   BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J",
                   1110:            s->daddr, ipa_is_link_local(s->daddr) ? p->cf->iface : NULL,
                   1111:            s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
                   1112:   bgp_setup_conn(p, conn);
                   1113:   bgp_setup_sk(conn, s);
                   1114:   bgp_conn_set_state(conn, BS_CONNECT);
                   1115: 
                   1116:   if (sk_open(s) < 0)
                   1117:     goto err;
                   1118: 
                   1119:   /* Set minimal receive TTL if needed */
                   1120:   if (p->cf->ttl_security)
                   1121:     if (sk_set_min_ttl(s, 256 - hops) < 0)
                   1122:       goto err;
                   1123: 
                   1124:   DBG("BGP: Waiting for connect success\n");
                   1125:   bgp_start_timer(conn->connect_timer, p->cf->connect_retry_time);
                   1126:   return;
                   1127: 
                   1128: err:
                   1129:   sk_log_error(s, p->p.name);
                   1130:   bgp_sock_err(s, 0);
                   1131:   return;
                   1132: }
                   1133: 
                   1134: static inline int bgp_is_dynamic(struct bgp_proto *p)
                   1135: { return ipa_zero(p->remote_ip); }
                   1136: 
                   1137: /**
                   1138:  * bgp_find_proto - find existing proto for incoming connection
                   1139:  * @sk: TCP socket
                   1140:  *
                   1141:  */
                   1142: static struct bgp_proto *
                   1143: bgp_find_proto(sock *sk)
                   1144: {
                   1145:   struct bgp_proto *best = NULL;
                   1146:   struct bgp_proto *p;
                   1147: 
                   1148:   /* sk->iface is valid only if src or dst address is link-local */
                   1149:   int link = ipa_is_link_local(sk->saddr) || ipa_is_link_local(sk->daddr);
                   1150: 
                   1151:   WALK_LIST(p, proto_list)
                   1152:     if ((p->p.proto == &proto_bgp) &&
                   1153:        (ipa_equal(p->remote_ip, sk->daddr) || bgp_is_dynamic(p)) &&
                   1154:        (!p->cf->remote_range || ipa_in_netX(sk->daddr, p->cf->remote_range)) &&
                   1155:        (p->p.vrf == sk->vrf) &&
                   1156:        (p->cf->local_port == sk->sport) &&
                   1157:        (!link || (p->cf->iface == sk->iface)) &&
                   1158:        (ipa_zero(p->cf->local_ip) || ipa_equal(p->cf->local_ip, sk->saddr)))
                   1159:     {
                   1160:       best = p;
                   1161: 
                   1162:       if (!bgp_is_dynamic(p))
                   1163:        break;
                   1164:     }
                   1165: 
                   1166:   return best;
                   1167: }
                   1168: 
                   1169: /**
                   1170:  * bgp_incoming_connection - handle an incoming connection
                   1171:  * @sk: TCP socket
                   1172:  * @dummy: unused
                   1173:  *
                   1174:  * This function serves as a socket hook for accepting of new BGP
                   1175:  * connections. It searches a BGP instance corresponding to the peer
                   1176:  * which has connected and if such an instance exists, it creates a
                   1177:  * &bgp_conn structure, attaches it to the instance and either sends
                   1178:  * an Open message or (if there already is an active connection) it
                   1179:  * closes the new connection by sending a Notification message.
                   1180:  */
                   1181: static int
                   1182: bgp_incoming_connection(sock *sk, uint dummy UNUSED)
                   1183: {
                   1184:   struct bgp_proto *p;
                   1185:   int acc, hops;
                   1186: 
                   1187:   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
                   1188:   p = bgp_find_proto(sk);
                   1189:   if (!p)
                   1190:   {
                   1191:     log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
                   1192:        sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
                   1193:     rfree(sk);
                   1194:     return 0;
                   1195:   }
                   1196: 
                   1197:   /*
                   1198:    * BIRD should keep multiple incoming connections in OpenSent state (for
                   1199:    * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
                   1200:    * connections are rejected istead. The exception is the case where an
                   1201:    * incoming connection triggers a graceful restart.
                   1202:    */
                   1203: 
                   1204:   acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
                   1205:     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
                   1206: 
                   1207:   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
                   1208:   {
                   1209:     bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
                   1210:     bgp_handle_graceful_restart(p);
                   1211:     bgp_conn_enter_idle_state(p->conn);
                   1212:     acc = 1;
                   1213: 
                   1214:     /* There might be separate incoming connection in OpenSent state */
                   1215:     if (p->incoming_conn.state > BS_ACTIVE)
                   1216:       bgp_close_conn(&p->incoming_conn);
                   1217:   }
                   1218: 
                   1219:   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
                   1220:            sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
                   1221:            sk->dport, acc ? "accepted" : "rejected");
                   1222: 
                   1223:   if (!acc)
                   1224:   {
                   1225:     rfree(sk);
                   1226:     return 0;
                   1227:   }
                   1228: 
                   1229:   hops = p->cf->multihop ? : 1;
                   1230: 
                   1231:   if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
                   1232:     goto err;
                   1233: 
                   1234:   if (p->cf->ttl_security)
                   1235:     if (sk_set_min_ttl(sk, 256 - hops) < 0)
                   1236:       goto err;
                   1237: 
                   1238:   if (p->cf->enable_extended_messages)
                   1239:   {
                   1240:     sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
                   1241:     sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
                   1242:     sk_reallocate(sk);
                   1243:   }
                   1244: 
                   1245:   /* For dynamic BGP, spawn new instance and postpone the socket */
                   1246:   if (bgp_is_dynamic(p))
                   1247:   {
                   1248:     p = bgp_spawn(p, sk->daddr);
                   1249:     p->postponed_sk = sk;
                   1250:     rmove(sk, p->p.pool);
                   1251:     return 0;
                   1252:   }
                   1253: 
                   1254:   rmove(sk, p->p.pool);
                   1255:   bgp_setup_conn(p, &p->incoming_conn);
                   1256:   bgp_setup_sk(&p->incoming_conn, sk);
                   1257:   bgp_send_open(&p->incoming_conn);
                   1258:   return 0;
                   1259: 
                   1260: err:
                   1261:   sk_log_error(sk, p->p.name);
                   1262:   log(L_ERR "%s: Incoming connection aborted", p->p.name);
                   1263:   rfree(sk);
                   1264:   return 0;
                   1265: }
                   1266: 
                   1267: static void
                   1268: bgp_listen_sock_err(sock *sk UNUSED, int err)
                   1269: {
                   1270:   if (err == ECONNABORTED)
                   1271:     log(L_WARN "BGP: Incoming connection aborted");
                   1272:   else
                   1273:     log(L_ERR "BGP: Error on listening socket: %M", err);
                   1274: }
                   1275: 
                   1276: static void
                   1277: bgp_start_neighbor(struct bgp_proto *p)
                   1278: {
                   1279:   /* Called only for single-hop BGP sessions */
                   1280: 
                   1281:   if (ipa_zero(p->local_ip))
                   1282:     p->local_ip = p->neigh->ifa->ip;
                   1283: 
                   1284:   if (ipa_is_link_local(p->local_ip))
                   1285:     p->link_addr = p->local_ip;
                   1286:   else if (p->neigh->iface->llv6)
                   1287:     p->link_addr = p->neigh->iface->llv6->ip;
                   1288: 
                   1289:   bgp_initiate(p);
                   1290: }
                   1291: 
                   1292: static void
                   1293: bgp_neigh_notify(neighbor *n)
                   1294: {
                   1295:   struct bgp_proto *p = (struct bgp_proto *) n->proto;
                   1296:   int ps = p->p.proto_state;
                   1297: 
                   1298:   if (n != p->neigh)
                   1299:     return;
                   1300: 
                   1301:   if ((ps == PS_DOWN) || (ps == PS_STOP))
                   1302:     return;
                   1303: 
                   1304:   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
                   1305: 
                   1306:   if (n->scope <= 0)
                   1307:   {
                   1308:     if (!prepare)
                   1309:     {
                   1310:       BGP_TRACE(D_EVENTS, "Neighbor lost");
                   1311:       bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
                   1312:       /* Perhaps also run bgp_update_startup_delay(p)? */
                   1313:       bgp_stop(p, 0, NULL, 0);
                   1314:     }
                   1315:   }
                   1316:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
                   1317:   {
                   1318:     if (!prepare)
                   1319:     {
                   1320:       BGP_TRACE(D_EVENTS, "Link down");
                   1321:       bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
                   1322:       if (ps == PS_UP)
                   1323:        bgp_update_startup_delay(p);
                   1324:       bgp_stop(p, 0, NULL, 0);
                   1325:     }
                   1326:   }
                   1327:   else
                   1328:   {
                   1329:     if (prepare)
                   1330:     {
                   1331:       BGP_TRACE(D_EVENTS, "Neighbor ready");
                   1332:       bgp_start_neighbor(p);
                   1333:     }
                   1334:   }
                   1335: }
                   1336: 
                   1337: static void
                   1338: bgp_bfd_notify(struct bfd_request *req)
                   1339: {
                   1340:   struct bgp_proto *p = req->data;
                   1341:   int ps = p->p.proto_state;
                   1342: 
                   1343:   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
                   1344:   {
                   1345:     BGP_TRACE(D_EVENTS, "BFD session down");
                   1346:     bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
                   1347: 
                   1348:     if (p->cf->bfd == BGP_BFD_GRACEFUL)
                   1349:     {
                   1350:       /* Trigger graceful restart */
                   1351:       if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
                   1352:        bgp_handle_graceful_restart(p);
                   1353: 
                   1354:       if (p->incoming_conn.state > BS_IDLE)
                   1355:        bgp_conn_enter_idle_state(&p->incoming_conn);
                   1356: 
                   1357:       if (p->outgoing_conn.state > BS_IDLE)
                   1358:        bgp_conn_enter_idle_state(&p->outgoing_conn);
                   1359:     }
                   1360:     else
                   1361:     {
                   1362:       /* Trigger session down */
                   1363:       if (ps == PS_UP)
                   1364:        bgp_update_startup_delay(p);
                   1365:       bgp_stop(p, 0, NULL, 0);
                   1366:     }
                   1367:   }
                   1368: }
                   1369: 
                   1370: static void
                   1371: bgp_update_bfd(struct bgp_proto *p, int use_bfd)
                   1372: {
                   1373:   if (use_bfd && !p->bfd_req && !bgp_is_dynamic(p))
                   1374:     p->bfd_req = bfd_request_session(p->p.pool, p->remote_ip, p->local_ip,
                   1375:                                     p->cf->multihop ? NULL : p->neigh->iface,
                   1376:                                     p->p.vrf, bgp_bfd_notify, p);
                   1377: 
                   1378:   if (!use_bfd && p->bfd_req)
                   1379:   {
                   1380:     rfree(p->bfd_req);
                   1381:     p->bfd_req = NULL;
                   1382:   }
                   1383: }
                   1384: 
                   1385: static void
                   1386: bgp_reload_routes(struct channel *C)
                   1387: {
                   1388:   struct bgp_proto *p = (void *) C->proto;
                   1389:   struct bgp_channel *c = (void *) C;
                   1390: 
                   1391:   ASSERT(p->conn && (p->route_refresh || c->c.in_table));
                   1392: 
                   1393:   if (c->c.in_table)
                   1394:     channel_schedule_reload(C);
                   1395:   else
                   1396:     bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
                   1397: }
                   1398: 
                   1399: static void
                   1400: bgp_feed_begin(struct channel *C, int initial)
                   1401: {
                   1402:   struct bgp_proto *p = (void *) C->proto;
                   1403:   struct bgp_channel *c = (void *) C;
                   1404: 
                   1405:   /* This should not happen */
                   1406:   if (!p->conn)
                   1407:     return;
                   1408: 
                   1409:   if (initial && p->cf->gr_mode)
                   1410:     c->feed_state = BFS_LOADING;
                   1411: 
                   1412:   /* It is refeed and both sides support enhanced route refresh */
                   1413:   if (!initial && p->enhanced_refresh)
                   1414:   {
                   1415:     /* BoRR must not be sent before End-of-RIB */
                   1416:     if (c->feed_state == BFS_LOADING || c->feed_state == BFS_LOADED)
                   1417:       return;
                   1418: 
                   1419:     c->feed_state = BFS_REFRESHING;
                   1420:     bgp_schedule_packet(p->conn, c, PKT_BEGIN_REFRESH);
                   1421:   }
                   1422: }
                   1423: 
                   1424: static void
                   1425: bgp_feed_end(struct channel *C)
                   1426: {
                   1427:   struct bgp_proto *p = (void *) C->proto;
                   1428:   struct bgp_channel *c = (void *) C;
                   1429: 
                   1430:   /* This should not happen */
                   1431:   if (!p->conn)
                   1432:     return;
                   1433: 
                   1434:   /* Non-demarcated feed ended, nothing to do */
                   1435:   if (c->feed_state == BFS_NONE)
                   1436:     return;
                   1437: 
                   1438:   /* Schedule End-of-RIB packet */
                   1439:   if (c->feed_state == BFS_LOADING)
                   1440:     c->feed_state = BFS_LOADED;
                   1441: 
                   1442:   /* Schedule EoRR packet */
                   1443:   if (c->feed_state == BFS_REFRESHING)
                   1444:     c->feed_state = BFS_REFRESHED;
                   1445: 
                   1446:   /* Kick TX hook */
                   1447:   bgp_schedule_packet(p->conn, c, PKT_UPDATE);
                   1448: }
                   1449: 
                   1450: 
                   1451: static void
                   1452: bgp_start_locked(struct object_lock *lock)
                   1453: {
                   1454:   struct bgp_proto *p = lock->data;
                   1455:   const struct bgp_config *cf = p->cf;
                   1456: 
                   1457:   if (p->p.proto_state != PS_START)
                   1458:   {
                   1459:     DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
                   1460:     return;
                   1461:   }
                   1462: 
                   1463:   DBG("BGP: Got lock\n");
                   1464: 
                   1465:   if (cf->multihop || bgp_is_dynamic(p))
                   1466:   {
                   1467:     /* Multi-hop sessions do not use neighbor entries */
                   1468:     bgp_initiate(p);
                   1469:     return;
                   1470:   }
                   1471: 
                   1472:   neighbor *n = neigh_find(&p->p, p->remote_ip, cf->iface, NEF_STICKY);
                   1473:   if (!n)
                   1474:   {
                   1475:     log(L_ERR "%s: Invalid remote address %I%J", p->p.name, p->remote_ip, cf->iface);
                   1476:     /* As we do not start yet, we can just disable protocol */
                   1477:     p->p.disabled = 1;
                   1478:     bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
                   1479:     proto_notify_state(&p->p, PS_DOWN);
                   1480:     return;
                   1481:   }
                   1482: 
                   1483:   p->neigh = n;
                   1484: 
                   1485:   if (n->scope <= 0)
                   1486:     BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", p->remote_ip, cf->iface);
                   1487:   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
                   1488:     BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
                   1489:   else
                   1490:     bgp_start_neighbor(p);
                   1491: }
                   1492: 
                   1493: static int
                   1494: bgp_start(struct proto *P)
                   1495: {
                   1496:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1497:   const struct bgp_config *cf = p->cf;
                   1498: 
                   1499:   p->local_ip = cf->local_ip;
                   1500:   p->local_as = cf->local_as;
                   1501:   p->remote_as = cf->remote_as;
                   1502:   p->public_as = cf->local_as;
                   1503: 
                   1504:   /* For dynamic BGP childs, remote_ip is already set */
                   1505:   if (ipa_nonzero(cf->remote_ip))
                   1506:     p->remote_ip = cf->remote_ip;
                   1507: 
                   1508:   /* Confederation ID is used for truly external peers */
                   1509:   if (p->cf->confederation && !p->is_interior)
                   1510:     p->public_as = cf->confederation;
                   1511: 
                   1512:   p->passive = cf->passive || bgp_is_dynamic(p);
                   1513: 
                   1514:   p->start_state = BSS_PREPARE;
                   1515:   p->outgoing_conn.state = BS_IDLE;
                   1516:   p->incoming_conn.state = BS_IDLE;
                   1517:   p->neigh = NULL;
                   1518:   p->bfd_req = NULL;
                   1519:   p->postponed_sk = NULL;
                   1520:   p->gr_ready = 0;
                   1521:   p->gr_active_num = 0;
                   1522: 
                   1523:   p->event = ev_new_init(p->p.pool, bgp_decision, p);
                   1524:   p->startup_timer = tm_new_init(p->p.pool, bgp_startup_timeout, p, 0, 0);
                   1525:   p->gr_timer = tm_new_init(p->p.pool, bgp_graceful_restart_timeout, p, 0, 0);
                   1526: 
                   1527:   p->local_id = proto_get_router_id(P->cf);
                   1528:   if (p->rr_client)
                   1529:     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
                   1530: 
                   1531:   p->remote_id = 0;
                   1532:   p->link_addr = IPA_NONE;
                   1533: 
                   1534:   /* Lock all channels when in GR recovery mode */
                   1535:   if (p->p.gr_recovery && p->cf->gr_mode)
                   1536:   {
                   1537:     struct bgp_channel *c;
                   1538:     WALK_LIST(c, p->p.channels)
                   1539:       channel_graceful_restart_lock(&c->c);
                   1540:   }
                   1541: 
                   1542:   /*
                   1543:    * Before attempting to create the connection, we need to lock the port,
                   1544:    * so that we are the only instance attempting to talk with that neighbor.
                   1545:    */
                   1546:   struct object_lock *lock;
                   1547:   lock = p->lock = olock_new(P->pool);
                   1548:   lock->addr = p->remote_ip;
                   1549:   lock->port = p->cf->remote_port;
                   1550:   lock->iface = p->cf->iface;
                   1551:   lock->vrf = p->cf->iface ? NULL : p->p.vrf;
                   1552:   lock->type = OBJLOCK_TCP;
                   1553:   lock->hook = bgp_start_locked;
                   1554:   lock->data = p;
                   1555: 
                   1556:   /* For dynamic BGP, we use inst 1 to avoid collisions with regular BGP */
                   1557:   if (bgp_is_dynamic(p))
                   1558:   {
                   1559:     lock->addr = net_prefix(p->cf->remote_range);
                   1560:     lock->inst = 1;
                   1561:   }
                   1562: 
                   1563:   olock_acquire(lock);
                   1564: 
                   1565:   return PS_START;
                   1566: }
                   1567: 
                   1568: extern int proto_restart;
                   1569: 
                   1570: static int
                   1571: bgp_shutdown(struct proto *P)
                   1572: {
                   1573:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1574:   int subcode = 0;
                   1575: 
                   1576:   char *message = NULL;
                   1577:   byte *data = NULL;
                   1578:   uint len = 0;
                   1579: 
                   1580:   BGP_TRACE(D_EVENTS, "Shutdown requested");
                   1581: 
                   1582:   switch (P->down_code)
                   1583:   {
                   1584:   case PDC_CF_REMOVE:
                   1585:   case PDC_CF_DISABLE:
                   1586:     subcode = 3; // Errcode 6, 3 - peer de-configured
                   1587:     break;
                   1588: 
                   1589:   case PDC_CF_RESTART:
                   1590:     subcode = 6; // Errcode 6, 6 - other configuration change
                   1591:     break;
                   1592: 
                   1593:   case PDC_CMD_DISABLE:
                   1594:   case PDC_CMD_SHUTDOWN:
                   1595:   shutdown:
                   1596:     subcode = 2; // Errcode 6, 2 - administrative shutdown
                   1597:     message = P->message;
                   1598:     break;
                   1599: 
                   1600:   case PDC_CMD_RESTART:
                   1601:     subcode = 4; // Errcode 6, 4 - administrative reset
                   1602:     message = P->message;
                   1603:     break;
                   1604: 
                   1605:   case PDC_CMD_GR_DOWN:
                   1606:     if ((p->cf->gr_mode != BGP_GR_ABLE) &&
                   1607:        (p->cf->llgr_mode != BGP_LLGR_ABLE))
                   1608:       goto shutdown;
                   1609: 
                   1610:     subcode = -1; // Do not send NOTIFICATION, just close the connection
                   1611:     break;
                   1612: 
                   1613:   case PDC_RX_LIMIT_HIT:
                   1614:   case PDC_IN_LIMIT_HIT:
                   1615:     subcode = 1; // Errcode 6, 1 - max number of prefixes reached
                   1616:     /* log message for compatibility */
                   1617:     log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
                   1618:     goto limit;
                   1619: 
                   1620:   case PDC_OUT_LIMIT_HIT:
                   1621:     subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
                   1622: 
                   1623:   limit:
                   1624:     bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
                   1625:     if (proto_restart)
                   1626:       bgp_update_startup_delay(p);
                   1627:     else
                   1628:       p->startup_delay = 0;
                   1629:     goto done;
                   1630:   }
                   1631: 
                   1632:   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
                   1633:   p->startup_delay = 0;
                   1634: 
                   1635:   /* RFC 8203 - shutdown communication */
                   1636:   if (message)
                   1637:   {
                   1638:     uint msg_len = strlen(message);
                   1639:     msg_len = MIN(msg_len, 255);
                   1640: 
                   1641:     /* Buffer will be freed automatically by protocol shutdown */
                   1642:     data = mb_alloc(p->p.pool, msg_len + 1);
                   1643:     len = msg_len + 1;
                   1644: 
                   1645:     data[0] = msg_len;
                   1646:     memcpy(data+1, message, msg_len);
                   1647:   }
                   1648: 
                   1649: done:
                   1650:   bgp_stop(p, subcode, data, len);
                   1651:   return p->p.proto_state;
                   1652: }
                   1653: 
                   1654: static struct proto *
                   1655: bgp_init(struct proto_config *CF)
                   1656: {
                   1657:   struct proto *P = proto_new(CF);
                   1658:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1659:   struct bgp_config *cf = (struct bgp_config *) CF;
                   1660: 
                   1661:   P->rt_notify = bgp_rt_notify;
                   1662:   P->preexport = bgp_preexport;
                   1663:   P->neigh_notify = bgp_neigh_notify;
                   1664:   P->reload_routes = bgp_reload_routes;
                   1665:   P->feed_begin = bgp_feed_begin;
                   1666:   P->feed_end = bgp_feed_end;
                   1667:   P->rte_better = bgp_rte_better;
                   1668:   P->rte_mergable = bgp_rte_mergable;
                   1669:   P->rte_recalculate = cf->deterministic_med ? bgp_rte_recalculate : NULL;
                   1670:   P->rte_modify = bgp_rte_modify_stale;
                   1671: 
                   1672:   p->cf = cf;
                   1673:   p->is_internal = (cf->local_as == cf->remote_as);
                   1674:   p->is_interior = p->is_internal || cf->confederation_member;
                   1675:   p->rs_client = cf->rs_client;
                   1676:   p->rr_client = cf->rr_client;
                   1677: 
                   1678:   p->ipv4 = ipa_nonzero(cf->remote_ip) ?
                   1679:     ipa_is_ip4(cf->remote_ip) :
                   1680:     (cf->remote_range && (cf->remote_range->type == NET_IP4));
                   1681: 
                   1682:   p->remote_ip = cf->remote_ip;
                   1683:   p->remote_as = cf->remote_as;
                   1684: 
                   1685:   /* Hack: We use cf->remote_ip just to pass remote_ip from bgp_spawn() */
                   1686:   if (cf->c.parent)
                   1687:     cf->remote_ip = IPA_NONE;
                   1688: 
                   1689:   /* Add all channels */
                   1690:   struct bgp_channel_config *cc;
                   1691:   WALK_LIST(cc, CF->channels)
                   1692:     proto_add_channel(P, &cc->c);
                   1693: 
                   1694:   return P;
                   1695: }
                   1696: 
                   1697: static void
                   1698: bgp_channel_init(struct channel *C, struct channel_config *CF)
                   1699: {
                   1700:   struct bgp_channel *c = (void *) C;
                   1701:   struct bgp_channel_config *cf = (void *) CF;
                   1702: 
                   1703:   c->cf = cf;
                   1704:   c->afi = cf->afi;
                   1705:   c->desc = cf->desc;
                   1706: 
                   1707:   if (cf->igp_table_ip4)
                   1708:     c->igp_table_ip4 = cf->igp_table_ip4->table;
                   1709: 
                   1710:   if (cf->igp_table_ip6)
                   1711:     c->igp_table_ip6 = cf->igp_table_ip6->table;
                   1712: }
                   1713: 
                   1714: static int
                   1715: bgp_channel_start(struct channel *C)
                   1716: {
                   1717:   struct bgp_proto *p = (void *) C->proto;
                   1718:   struct bgp_channel *c = (void *) C;
                   1719:   ip_addr src = p->local_ip;
                   1720: 
                   1721:   if (c->igp_table_ip4)
                   1722:     rt_lock_table(c->igp_table_ip4);
                   1723: 
                   1724:   if (c->igp_table_ip6)
                   1725:     rt_lock_table(c->igp_table_ip6);
                   1726: 
                   1727:   c->pool = p->p.pool; // XXXX
                   1728:   bgp_init_bucket_table(c);
                   1729:   bgp_init_prefix_table(c);
                   1730: 
                   1731:   if (c->cf->import_table)
                   1732:     channel_setup_in_table(C);
                   1733: 
                   1734:   if (c->cf->export_table)
                   1735:     channel_setup_out_table(C);
                   1736: 
                   1737:   c->stale_timer = tm_new_init(c->pool, bgp_long_lived_stale_timeout, c, 0, 0);
                   1738: 
                   1739:   c->next_hop_addr = c->cf->next_hop_addr;
                   1740:   c->link_addr = IPA_NONE;
                   1741:   c->packets_to_send = 0;
                   1742: 
                   1743:   /* Try to use source address as next hop address */
                   1744:   if (ipa_zero(c->next_hop_addr))
                   1745:   {
                   1746:     if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
                   1747:       c->next_hop_addr = src;
                   1748: 
                   1749:     if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
                   1750:       c->next_hop_addr = src;
                   1751:   }
                   1752: 
                   1753:   /* Use preferred addresses associated with interface / source address */
                   1754:   if (ipa_zero(c->next_hop_addr))
                   1755:   {
                   1756:     /* We know the iface for single-hop, we make lookup for multihop */
                   1757:     struct neighbor *nbr = p->neigh ?: neigh_find(&p->p, src, NULL, 0);
                   1758:     struct iface *iface = nbr ? nbr->iface : NULL;
                   1759: 
                   1760:     if (bgp_channel_is_ipv4(c) && iface && iface->addr4)
                   1761:       c->next_hop_addr = iface->addr4->ip;
                   1762: 
                   1763:     if (bgp_channel_is_ipv6(c) && iface && iface->addr6)
                   1764:       c->next_hop_addr = iface->addr6->ip;
                   1765:   }
                   1766: 
                   1767:   /* Exit if no feasible next hop address is found */
                   1768:   if (ipa_zero(c->next_hop_addr))
                   1769:   {
                   1770:     log(L_WARN "%s: Missing next hop address", p->p.name);
                   1771:     return 0;
                   1772:   }
                   1773: 
                   1774:   /* Set link-local address for IPv6 single-hop BGP */
                   1775:   if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
                   1776:   {
                   1777:     c->link_addr = p->link_addr;
                   1778: 
                   1779:     if (ipa_zero(c->link_addr))
                   1780:       log(L_WARN "%s: Missing link-local address", p->p.name);
                   1781:   }
                   1782: 
                   1783:   /* Link local address is already in c->link_addr */
                   1784:   if (ipa_is_link_local(c->next_hop_addr))
                   1785:     c->next_hop_addr = IPA_NONE;
                   1786: 
                   1787:   return 0; /* XXXX: Currently undefined */
                   1788: }
                   1789: 
                   1790: static void
                   1791: bgp_channel_shutdown(struct channel *C)
                   1792: {
                   1793:   struct bgp_channel *c = (void *) C;
                   1794: 
                   1795:   c->next_hop_addr = IPA_NONE;
                   1796:   c->link_addr = IPA_NONE;
                   1797:   c->packets_to_send = 0;
                   1798: }
                   1799: 
                   1800: static void
                   1801: bgp_channel_cleanup(struct channel *C)
                   1802: {
                   1803:   struct bgp_channel *c = (void *) C;
                   1804: 
                   1805:   if (c->igp_table_ip4)
                   1806:     rt_unlock_table(c->igp_table_ip4);
                   1807: 
                   1808:   if (c->igp_table_ip6)
                   1809:     rt_unlock_table(c->igp_table_ip6);
                   1810: 
                   1811:   c->index = 0;
                   1812: 
                   1813:   /* Cleanup rest of bgp_channel starting at pool field */
                   1814:   memset(&(c->pool), 0, sizeof(struct bgp_channel) - OFFSETOF(struct bgp_channel, pool));
                   1815: }
                   1816: 
                   1817: static inline struct bgp_channel_config *
                   1818: bgp_find_channel_config(struct bgp_config *cf, u32 afi)
                   1819: {
                   1820:   struct bgp_channel_config *cc;
                   1821: 
                   1822:   WALK_LIST(cc, cf->c.channels)
                   1823:     if (cc->afi == afi)
                   1824:       return cc;
                   1825: 
                   1826:   return NULL;
                   1827: }
                   1828: 
                   1829: struct rtable_config *
                   1830: bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
                   1831: {
                   1832:   struct bgp_channel_config *cc2;
                   1833:   struct rtable_config *tab;
                   1834: 
                   1835:   /* First, try table connected by the channel */
                   1836:   if (cc->c.table->addr_type == type)
                   1837:     return cc->c.table;
                   1838: 
                   1839:   /* Find paired channel with the same SAFI but the other AFI */
                   1840:   u32 afi2 = cc->afi ^ 0x30000;
                   1841:   cc2 = bgp_find_channel_config(cf, afi2);
                   1842: 
                   1843:   /* Second, try IGP table configured in the paired channel */
                   1844:   if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
                   1845:     return tab;
                   1846: 
                   1847:   /* Third, try table connected by the paired channel */
                   1848:   if (cc2 && (cc2->c.table->addr_type == type))
                   1849:     return cc2->c.table;
                   1850: 
                   1851:   /* Last, try default table of given type */
                   1852:   if (tab = cf->c.global->def_tables[type])
                   1853:     return tab;
                   1854: 
                   1855:   cf_error("Undefined IGP table");
                   1856: }
                   1857: 
                   1858: 
                   1859: void
                   1860: bgp_postconfig(struct proto_config *CF)
                   1861: {
                   1862:   struct bgp_config *cf = (void *) CF;
                   1863: 
                   1864:   /* Do not check templates at all */
                   1865:   if (cf->c.class == SYM_TEMPLATE)
                   1866:     return;
                   1867: 
                   1868: 
                   1869:   /* Handle undefined remote_as, zero should mean unspecified external */
                   1870:   if (!cf->remote_as && (cf->peer_type == BGP_PT_INTERNAL))
                   1871:     cf->remote_as = cf->local_as;
                   1872: 
                   1873:   int internal = (cf->local_as == cf->remote_as);
                   1874:   int interior = internal || cf->confederation_member;
                   1875: 
                   1876:   /* EBGP direct by default, IBGP multihop by default */
                   1877:   if (cf->multihop < 0)
                   1878:     cf->multihop = internal ? 64 : 0;
                   1879: 
                   1880:   /* LLGR mode default based on GR mode */
                   1881:   if (cf->llgr_mode < 0)
                   1882:     cf->llgr_mode = cf->gr_mode ? BGP_LLGR_AWARE : 0;
                   1883: 
                   1884:   /* Link check for single-hop BGP by default */
                   1885:   if (cf->check_link < 0)
                   1886:     cf->check_link = !cf->multihop;
                   1887: 
                   1888: 
                   1889:   if (!cf->local_as)
                   1890:     cf_error("Local AS number must be set");
                   1891: 
                   1892:   if (ipa_zero(cf->remote_ip) && !cf->remote_range)
                   1893:     cf_error("Neighbor must be configured");
                   1894: 
                   1895:   if (ipa_zero(cf->local_ip) && cf->strict_bind)
                   1896:     cf_error("Local address must be configured for strict bind");
                   1897: 
                   1898:   if (!cf->remote_as && !cf->peer_type)
                   1899:     cf_error("Remote AS number (or peer type) must be set");
                   1900: 
                   1901:   if ((cf->peer_type == BGP_PT_INTERNAL) && !internal)
                   1902:     cf_error("IBGP cannot have different ASNs");
                   1903: 
                   1904:   if ((cf->peer_type == BGP_PT_EXTERNAL) &&  internal)
                   1905:     cf_error("EBGP cannot have the same ASNs");
                   1906: 
                   1907:   if (!cf->iface && (ipa_is_link_local(cf->local_ip) ||
                   1908:                     ipa_is_link_local(cf->remote_ip)))
                   1909:     cf_error("Link-local addresses require defined interface");
                   1910: 
                   1911:   if (!(cf->capabilities && cf->enable_as4) && (cf->remote_as > 0xFFFF))
                   1912:     cf_error("Neighbor AS number out of range (AS4 not available)");
                   1913: 
                   1914:   if (!internal && cf->rr_client)
                   1915:     cf_error("Only internal neighbor can be RR client");
                   1916: 
                   1917:   if (internal && cf->rs_client)
                   1918:     cf_error("Only external neighbor can be RS client");
                   1919: 
                   1920:   if (!cf->confederation && cf->confederation_member)
                   1921:     cf_error("Confederation ID must be set for member sessions");
                   1922: 
                   1923:   if (cf->multihop && (ipa_is_link_local(cf->local_ip) ||
                   1924:                       ipa_is_link_local(cf->remote_ip)))
                   1925:     cf_error("Multihop BGP cannot be used with link-local addresses");
                   1926: 
                   1927:   if (cf->multihop && cf->iface)
                   1928:     cf_error("Multihop BGP cannot be bound to interface");
                   1929: 
                   1930:   if (cf->multihop && cf->check_link)
                   1931:     cf_error("Multihop BGP cannot depend on link state");
                   1932: 
                   1933:   if (cf->multihop && cf->bfd && ipa_zero(cf->local_ip))
                   1934:     cf_error("Multihop BGP with BFD requires specified local address");
                   1935: 
                   1936:   if (!cf->gr_mode && cf->llgr_mode)
                   1937:     cf_error("Long-lived graceful restart requires basic graceful restart");
                   1938: 
                   1939: 
                   1940:   struct bgp_channel_config *cc;
                   1941:   WALK_LIST(cc, CF->channels)
                   1942:   {
                   1943:     /* Handle undefined import filter */
                   1944:     if (cc->c.in_filter == FILTER_UNDEF)
                   1945:       if (interior)
                   1946:        cc->c.in_filter = FILTER_ACCEPT;
                   1947:       else
                   1948:        cf_error("EBGP requires explicit import policy");
                   1949: 
                   1950:     /* Handle undefined export filter */
                   1951:     if (cc->c.out_filter == FILTER_UNDEF)
                   1952:       if (interior)
                   1953:        cc->c.out_filter = FILTER_REJECT;
                   1954:       else
                   1955:        cf_error("EBGP requires explicit export policy");
                   1956: 
                   1957:     /* Disable after error incompatible with restart limit action */
                   1958:     if ((cc->c.in_limit.action == PLA_RESTART) && cf->disable_after_error)
                   1959:       cc->c.in_limit.action = PLA_DISABLE;
                   1960: 
                   1961:     /* Different default based on rr_client, rs_client */
                   1962:     if (cc->next_hop_keep == 0xff)
                   1963:       cc->next_hop_keep = cf->rr_client ? NH_IBGP : (cf->rs_client ? NH_ALL : NH_NO);
                   1964: 
                   1965:     /* Different default based on rs_client */
                   1966:     if (!cc->missing_lladdr)
                   1967:       cc->missing_lladdr = cf->rs_client ? MLL_IGNORE : MLL_SELF;
                   1968: 
                   1969:     /* Different default for gw_mode */
                   1970:     if (!cc->gw_mode)
                   1971:       cc->gw_mode = cf->multihop ? GW_RECURSIVE : GW_DIRECT;
                   1972: 
                   1973:     /* Defaults based on proto config */
                   1974:     if (cc->gr_able == 0xff)
                   1975:       cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
                   1976: 
                   1977:     if (cc->llgr_able == 0xff)
                   1978:       cc->llgr_able = (cf->llgr_mode == BGP_LLGR_ABLE);
                   1979: 
                   1980:     if (cc->llgr_time == ~0U)
                   1981:       cc->llgr_time = cf->llgr_time;
                   1982: 
                   1983:     /* AIGP enabled by default on interior sessions */
                   1984:     if (cc->aigp == 0xff)
                   1985:       cc->aigp = interior;
                   1986: 
                   1987:     /* Default values of IGP tables */
                   1988:     if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
                   1989:     {
                   1990:       if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
                   1991:        cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
                   1992: 
                   1993:       if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
                   1994:        cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
                   1995: 
                   1996:       if (cc->igp_table_ip4 && bgp_cc_is_ipv6(cc) && !cc->ext_next_hop)
                   1997:        cf_error("Mismatched IGP table type");
                   1998: 
                   1999:       if (cc->igp_table_ip6 && bgp_cc_is_ipv4(cc) && !cc->ext_next_hop)
                   2000:        cf_error("Mismatched IGP table type");
                   2001:     }
                   2002: 
                   2003:     if (cf->multihop && (cc->gw_mode == GW_DIRECT))
                   2004:       cf_error("Multihop BGP cannot use direct gateway mode");
                   2005: 
                   2006:     if ((cc->gw_mode == GW_RECURSIVE) && cc->c.table->sorted)
                   2007:       cf_error("BGP in recursive mode prohibits sorted table");
                   2008: 
                   2009:     if (cf->deterministic_med && cc->c.table->sorted)
                   2010:       cf_error("BGP with deterministic MED prohibits sorted table");
                   2011: 
                   2012:     if (cc->secondary && !cc->c.table->sorted)
                   2013:       cf_error("BGP with secondary option requires sorted table");
                   2014:   }
                   2015: }
                   2016: 
                   2017: static int
                   2018: bgp_reconfigure(struct proto *P, struct proto_config *CF)
                   2019: {
                   2020:   struct bgp_proto *p = (void *) P;
                   2021:   const struct bgp_config *new = (void *) CF;
                   2022:   const struct bgp_config *old = p->cf;
                   2023: 
                   2024:   if (proto_get_router_id(CF) != p->local_id)
                   2025:     return 0;
                   2026: 
                   2027:   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
                   2028:                     ((byte *) new) + sizeof(struct proto_config),
                   2029:                     // password item is last and must be checked separately
                   2030:                     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
                   2031:     && !bstrcmp(old->password, new->password)
                   2032:     && ((!old->remote_range && !new->remote_range)
                   2033:        || (old->remote_range && new->remote_range && net_equal(old->remote_range, new->remote_range)))
                   2034:     && !bstrcmp(old->dynamic_name, new->dynamic_name)
                   2035:     && (old->dynamic_name_digits == new->dynamic_name_digits);
                   2036: 
                   2037:   /* FIXME: Move channel reconfiguration to generic protocol code ? */
                   2038:   struct channel *C, *C2;
                   2039:   struct bgp_channel_config *cc;
                   2040: 
                   2041:   WALK_LIST(C, p->p.channels)
                   2042:     C->stale = 1;
                   2043: 
                   2044:   WALK_LIST(cc, new->c.channels)
                   2045:   {
                   2046:     C = (struct channel *) bgp_find_channel(p, cc->afi);
                   2047:     same = proto_configure_channel(P, &C, &cc->c) && same;
                   2048: 
                   2049:     if (C)
                   2050:       C->stale = 0;
                   2051:   }
                   2052: 
                   2053:   WALK_LIST_DELSAFE(C, C2, p->p.channels)
                   2054:     if (C->stale)
                   2055:       same = proto_configure_channel(P, &C, NULL) && same;
                   2056: 
                   2057: 
                   2058:   if (same && (p->start_state > BSS_PREPARE))
                   2059:     bgp_update_bfd(p, new->bfd);
                   2060: 
                   2061:   /* We should update our copy of configuration ptr as old configuration will be freed */
                   2062:   if (same)
                   2063:     p->cf = new;
                   2064: 
                   2065:   /* Reset name counter */
                   2066:   p->dynamic_name_counter = 0;
                   2067: 
                   2068:   return same;
                   2069: }
                   2070: 
                   2071: #define IGP_TABLE(cf, sym) ((cf)->igp_table_##sym ? (cf)->igp_table_##sym ->table : NULL )
                   2072: 
                   2073: static int
                   2074: bgp_channel_reconfigure(struct channel *C, struct channel_config *CC, int *import_changed, int *export_changed)
                   2075: {
                   2076:   struct bgp_proto *p = (void *) C->proto;
                   2077:   struct bgp_channel *c = (void *) C;
                   2078:   struct bgp_channel_config *new = (void *) CC;
                   2079:   struct bgp_channel_config *old = c->cf;
                   2080: 
                   2081:   if ((new->secondary != old->secondary) ||
                   2082:       (new->gr_able != old->gr_able) ||
                   2083:       (new->llgr_able != old->llgr_able) ||
                   2084:       (new->llgr_time != old->llgr_time) ||
                   2085:       (new->ext_next_hop != old->ext_next_hop) ||
                   2086:       (new->add_path != old->add_path) ||
                   2087:       (new->import_table != old->import_table) ||
                   2088:       (new->export_table != old->export_table) ||
                   2089:       (IGP_TABLE(new, ip4) != IGP_TABLE(old, ip4)) ||
                   2090:       (IGP_TABLE(new, ip6) != IGP_TABLE(old, ip6)))
                   2091:     return 0;
                   2092: 
                   2093:   if (new->mandatory && !old->mandatory && (C->channel_state != CS_UP))
                   2094:     return 0;
                   2095: 
                   2096:   if ((new->gw_mode != old->gw_mode) ||
                   2097:       (new->aigp != old->aigp) ||
                   2098:       (new->cost != old->cost))
                   2099:   {
                   2100:     /* import_changed itself does not force ROUTE_REFRESH when import_table is active */
                   2101:     if (c->c.in_table && (c->c.channel_state == CS_UP))
                   2102:       bgp_schedule_packet(p->conn, c, PKT_ROUTE_REFRESH);
                   2103: 
                   2104:     *import_changed = 1;
                   2105:   }
                   2106: 
                   2107:   if (!ipa_equal(new->next_hop_addr, old->next_hop_addr) ||
                   2108:       (new->next_hop_self != old->next_hop_self) ||
                   2109:       (new->next_hop_keep != old->next_hop_keep) ||
                   2110:       (new->missing_lladdr != old->missing_lladdr) ||
                   2111:       (new->aigp != old->aigp) ||
                   2112:       (new->aigp_originate != old->aigp_originate))
                   2113:     *export_changed = 1;
                   2114: 
                   2115:   c->cf = new;
                   2116:   return 1;
                   2117: }
                   2118: 
                   2119: static void
                   2120: bgp_copy_config(struct proto_config *dest UNUSED, struct proto_config *src UNUSED)
                   2121: {
                   2122:   /* Just a shallow copy */
                   2123: }
                   2124: 
                   2125: 
                   2126: /**
                   2127:  * bgp_error - report a protocol error
                   2128:  * @c: connection
                   2129:  * @code: error code (according to the RFC)
                   2130:  * @subcode: error sub-code
                   2131:  * @data: data to be passed in the Notification message
                   2132:  * @len: length of the data
                   2133:  *
                   2134:  * bgp_error() sends a notification packet to tell the other side that a protocol
                   2135:  * error has occurred (including the data considered erroneous if possible) and
                   2136:  * closes the connection.
                   2137:  */
                   2138: void
                   2139: bgp_error(struct bgp_conn *c, uint code, uint subcode, byte *data, int len)
                   2140: {
                   2141:   struct bgp_proto *p = c->bgp;
                   2142: 
                   2143:   if (c->state == BS_CLOSE)
                   2144:     return;
                   2145: 
                   2146:   bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, ABS(len));
                   2147:   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
                   2148:   bgp_conn_enter_close_state(c);
                   2149: 
                   2150:   c->notify_code = code;
                   2151:   c->notify_subcode = subcode;
                   2152:   c->notify_data = data;
                   2153:   c->notify_size = (len > 0) ? len : 0;
                   2154:   bgp_schedule_packet(c, NULL, PKT_NOTIFICATION);
                   2155: 
                   2156:   if (code != 6)
                   2157:   {
                   2158:     bgp_update_startup_delay(p);
                   2159:     bgp_stop(p, 0, NULL, 0);
                   2160:   }
                   2161: }
                   2162: 
                   2163: /**
                   2164:  * bgp_store_error - store last error for status report
                   2165:  * @p: BGP instance
                   2166:  * @c: connection
                   2167:  * @class: error class (BE_xxx constants)
                   2168:  * @code: error code (class specific)
                   2169:  *
                   2170:  * bgp_store_error() decides whether given error is interesting enough
                   2171:  * and store that error to last_error variables of @p
                   2172:  */
                   2173: void
                   2174: bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
                   2175: {
                   2176:   /* During PS_UP, we ignore errors on secondary connection */
                   2177:   if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
                   2178:     return;
                   2179: 
                   2180:   /* During PS_STOP, we ignore any errors, as we want to report
                   2181:    * the error that caused transition to PS_STOP
                   2182:    */
                   2183:   if (p->p.proto_state == PS_STOP)
                   2184:     return;
                   2185: 
                   2186:   p->last_error_class = class;
                   2187:   p->last_error_code = code;
                   2188: }
                   2189: 
                   2190: static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
                   2191: static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
                   2192: static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
                   2193: static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
                   2194: static char *bgp_gr_states[] = { "None", "Regular", "Long-lived"};
                   2195: 
                   2196: static const char *
                   2197: bgp_last_errmsg(struct bgp_proto *p)
                   2198: {
                   2199:   switch (p->last_error_class)
                   2200:   {
                   2201:   case BE_MISC:
                   2202:     return bgp_misc_errors[p->last_error_code];
                   2203:   case BE_SOCKET:
                   2204:     return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
                   2205:   case BE_BGP_RX:
                   2206:   case BE_BGP_TX:
                   2207:     return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
                   2208:   case BE_AUTO_DOWN:
                   2209:     return bgp_auto_errors[p->last_error_code];
                   2210:   default:
                   2211:     return "";
                   2212:   }
                   2213: }
                   2214: 
                   2215: static const char *
                   2216: bgp_state_dsc(struct bgp_proto *p)
                   2217: {
                   2218:   if (p->p.proto_state == PS_DOWN)
                   2219:     return "Down";
                   2220: 
                   2221:   int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
                   2222:   if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->passive)
                   2223:     return "Passive";
                   2224: 
                   2225:   return bgp_state_names[state];
                   2226: }
                   2227: 
                   2228: static void
                   2229: bgp_get_status(struct proto *P, byte *buf)
                   2230: {
                   2231:   struct bgp_proto *p = (struct bgp_proto *) P;
                   2232: 
                   2233:   const char *err1 = bgp_err_classes[p->last_error_class];
                   2234:   const char *err2 = bgp_last_errmsg(p);
                   2235: 
                   2236:   if (P->proto_state == PS_DOWN)
                   2237:     bsprintf(buf, "%s%s", err1, err2);
                   2238:   else
                   2239:     bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
                   2240: }
                   2241: 
                   2242: static void
                   2243: bgp_show_afis(int code, char *s, u32 *afis, uint count)
                   2244: {
                   2245:   buffer b;
                   2246:   LOG_BUFFER_INIT(b);
                   2247: 
                   2248:   buffer_puts(&b, s);
                   2249: 
                   2250:   for (u32 *af = afis; af < (afis + count); af++)
                   2251:   {
                   2252:     const struct bgp_af_desc *desc = bgp_get_af_desc(*af);
                   2253:     if (desc)
                   2254:       buffer_print(&b, " %s", desc->name);
                   2255:     else
                   2256:       buffer_print(&b, " <%u/%u>", BGP_AFI(*af), BGP_SAFI(*af));
                   2257:   }
                   2258: 
                   2259:   if (b.pos == b.end)
                   2260:     strcpy(b.end - 32, " ... <too long>");
                   2261: 
                   2262:   cli_msg(code, b.start);
                   2263: }
                   2264: 
                   2265: static void
                   2266: bgp_show_capabilities(struct bgp_proto *p UNUSED, struct bgp_caps *caps)
                   2267: {
                   2268:   struct bgp_af_caps *ac;
                   2269:   uint any_mp_bgp = 0;
                   2270:   uint any_gr_able = 0;
                   2271:   uint any_add_path = 0;
                   2272:   uint any_ext_next_hop = 0;
                   2273:   uint any_llgr_able = 0;
                   2274:   u32 *afl1 = alloca(caps->af_count * sizeof(u32));
                   2275:   u32 *afl2 = alloca(caps->af_count * sizeof(u32));
                   2276:   uint afn1, afn2;
                   2277: 
                   2278:   WALK_AF_CAPS(caps, ac)
                   2279:   {
                   2280:     any_mp_bgp |= ac->ready;
                   2281:     any_gr_able |= ac->gr_able;
                   2282:     any_add_path |= ac->add_path;
                   2283:     any_ext_next_hop |= ac->ext_next_hop;
                   2284:     any_llgr_able |= ac->llgr_able;
                   2285:   }
                   2286: 
                   2287:   if (any_mp_bgp)
                   2288:   {
                   2289:     cli_msg(-1006, "      Multiprotocol");
                   2290: 
                   2291:     afn1 = 0;
                   2292:     WALK_AF_CAPS(caps, ac)
                   2293:       if (ac->ready)
                   2294:        afl1[afn1++] = ac->afi;
                   2295: 
                   2296:     bgp_show_afis(-1006, "        AF announced:", afl1, afn1);
                   2297:   }
                   2298: 
                   2299:   if (caps->route_refresh)
                   2300:     cli_msg(-1006, "      Route refresh");
                   2301: 
                   2302:   if (any_ext_next_hop)
                   2303:   {
                   2304:     cli_msg(-1006, "      Extended next hop");
                   2305: 
                   2306:     afn1 = 0;
                   2307:     WALK_AF_CAPS(caps, ac)
                   2308:       if (ac->ext_next_hop)
                   2309:        afl1[afn1++] = ac->afi;
                   2310: 
                   2311:     bgp_show_afis(-1006, "        IPv6 nexthop:", afl1, afn1);
                   2312:   }
                   2313: 
                   2314:   if (caps->ext_messages)
                   2315:     cli_msg(-1006, "      Extended message");
                   2316: 
                   2317:   if (caps->gr_aware)
                   2318:     cli_msg(-1006, "      Graceful restart");
                   2319: 
                   2320:   if (any_gr_able)
                   2321:   {
                   2322:     /* Continues from gr_aware */
                   2323:     cli_msg(-1006, "        Restart time: %u", caps->gr_time);
                   2324:     if (caps->gr_flags & BGP_GRF_RESTART)
                   2325:       cli_msg(-1006, "        Restart recovery");
                   2326: 
                   2327:     afn1 = afn2 = 0;
                   2328:     WALK_AF_CAPS(caps, ac)
                   2329:     {
                   2330:       if (ac->gr_able)
                   2331:        afl1[afn1++] = ac->afi;
                   2332: 
                   2333:       if (ac->gr_af_flags & BGP_GRF_FORWARDING)
                   2334:        afl2[afn2++] = ac->afi;
                   2335:     }
                   2336: 
                   2337:     bgp_show_afis(-1006, "        AF supported:", afl1, afn1);
                   2338:     bgp_show_afis(-1006, "        AF preserved:", afl2, afn2);
                   2339:   }
                   2340: 
                   2341:   if (caps->as4_support)
                   2342:     cli_msg(-1006, "      4-octet AS numbers");
                   2343: 
                   2344:   if (any_add_path)
                   2345:   {
                   2346:     cli_msg(-1006, "      ADD-PATH");
                   2347: 
                   2348:     afn1 = afn2 = 0;
                   2349:     WALK_AF_CAPS(caps, ac)
                   2350:     {
                   2351:       if (ac->add_path & BGP_ADD_PATH_RX)
                   2352:        afl1[afn1++] = ac->afi;
                   2353: 
                   2354:       if (ac->add_path & BGP_ADD_PATH_TX)
                   2355:        afl2[afn2++] = ac->afi;
                   2356:     }
                   2357: 
                   2358:     bgp_show_afis(-1006, "        RX:", afl1, afn1);
                   2359:     bgp_show_afis(-1006, "        TX:", afl2, afn2);
                   2360:   }
                   2361: 
                   2362:   if (caps->enhanced_refresh)
                   2363:     cli_msg(-1006, "      Enhanced refresh");
                   2364: 
                   2365:   if (caps->llgr_aware)
                   2366:     cli_msg(-1006, "      Long-lived graceful restart");
                   2367: 
                   2368:   if (any_llgr_able)
                   2369:   {
                   2370:     u32 stale_time = 0;
                   2371: 
                   2372:     afn1 = afn2 = 0;
                   2373:     WALK_AF_CAPS(caps, ac)
                   2374:     {
                   2375:       stale_time = MAX(stale_time, ac->llgr_time);
                   2376: 
                   2377:       if (ac->llgr_able && ac->llgr_time)
                   2378:        afl1[afn1++] = ac->afi;
                   2379: 
                   2380:       if (ac->llgr_flags & BGP_GRF_FORWARDING)
                   2381:        afl2[afn2++] = ac->afi;
                   2382:     }
                   2383: 
                   2384:     /* Continues from llgr_aware */
                   2385:     cli_msg(-1006, "        LL stale time: %u", stale_time);
                   2386: 
                   2387:     bgp_show_afis(-1006, "        AF supported:", afl1, afn1);
                   2388:     bgp_show_afis(-1006, "        AF preserved:", afl2, afn2);
                   2389:   }
                   2390: }
                   2391: 
                   2392: static void
                   2393: bgp_show_proto_info(struct proto *P)
                   2394: {
                   2395:   struct bgp_proto *p = (struct bgp_proto *) P;
                   2396: 
                   2397:   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
                   2398: 
                   2399:   if (bgp_is_dynamic(p) && p->cf->remote_range)
                   2400:     cli_msg(-1006, "    Neighbor range:   %N", p->cf->remote_range);
                   2401:   else
                   2402:     cli_msg(-1006, "    Neighbor address: %I%J", p->remote_ip, p->cf->iface);
                   2403: 
                   2404:   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
                   2405:   cli_msg(-1006, "    Local AS:         %u", p->cf->local_as);
                   2406: 
                   2407:   if (p->gr_active_num)
                   2408:     cli_msg(-1006, "    Neighbor graceful restart active");
                   2409: 
                   2410:   if (P->proto_state == PS_START)
                   2411:   {
                   2412:     struct bgp_conn *oc = &p->outgoing_conn;
                   2413: 
                   2414:     if ((p->start_state < BSS_CONNECT) &&
                   2415:        (tm_active(p->startup_timer)))
                   2416:       cli_msg(-1006, "    Error wait:       %t/%u",
                   2417:              tm_remains(p->startup_timer), p->startup_delay);
                   2418: 
                   2419:     if ((oc->state == BS_ACTIVE) &&
                   2420:        (tm_active(oc->connect_timer)))
                   2421:       cli_msg(-1006, "    Connect delay:    %t/%u",
                   2422:              tm_remains(oc->connect_timer), p->cf->connect_delay_time);
                   2423: 
                   2424:     if (p->gr_active_num && tm_active(p->gr_timer))
                   2425:       cli_msg(-1006, "    Restart timer:    %t/-",
                   2426:              tm_remains(p->gr_timer));
                   2427:   }
                   2428:   else if (P->proto_state == PS_UP)
                   2429:   {
                   2430:     cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
                   2431:     cli_msg(-1006, "    Local capabilities");
                   2432:     bgp_show_capabilities(p, p->conn->local_caps);
                   2433:     cli_msg(-1006, "    Neighbor capabilities");
                   2434:     bgp_show_capabilities(p, p->conn->remote_caps);
                   2435:     cli_msg(-1006, "    Session:          %s%s%s%s%s",
                   2436:            p->is_internal ? "internal" : "external",
                   2437:            p->cf->multihop ? " multihop" : "",
                   2438:            p->rr_client ? " route-reflector" : "",
                   2439:            p->rs_client ? " route-server" : "",
                   2440:            p->as4_session ? " AS4" : "");
                   2441:     cli_msg(-1006, "    Source address:   %I", p->local_ip);
                   2442:     cli_msg(-1006, "    Hold timer:       %t/%u",
                   2443:            tm_remains(p->conn->hold_timer), p->conn->hold_time);
                   2444:     cli_msg(-1006, "    Keepalive timer:  %t/%u",
                   2445:            tm_remains(p->conn->keepalive_timer), p->conn->keepalive_time);
                   2446:   }
                   2447: 
                   2448:   if ((p->last_error_class != BE_NONE) &&
                   2449:       (p->last_error_class != BE_MAN_DOWN))
                   2450:   {
                   2451:     const char *err1 = bgp_err_classes[p->last_error_class];
                   2452:     const char *err2 = bgp_last_errmsg(p);
                   2453:     cli_msg(-1006, "    Last error:       %s%s", err1, err2);
                   2454:   }
                   2455: 
                   2456:   {
                   2457:     struct bgp_channel *c;
                   2458:     WALK_LIST(c, p->p.channels)
                   2459:     {
                   2460:       channel_show_info(&c->c);
                   2461: 
                   2462:       if (p->gr_active_num)
                   2463:        cli_msg(-1006, "    Neighbor GR:    %s", bgp_gr_states[c->gr_active]);
                   2464: 
                   2465:       if (c->stale_timer && tm_active(c->stale_timer))
                   2466:        cli_msg(-1006, "    LL stale timer: %t/-", tm_remains(c->stale_timer));
                   2467: 
                   2468:       if (c->c.channel_state == CS_UP)
                   2469:       {
                   2470:        if (ipa_zero(c->link_addr))
                   2471:          cli_msg(-1006, "    BGP Next hop:   %I", c->next_hop_addr);
                   2472:        else
                   2473:          cli_msg(-1006, "    BGP Next hop:   %I %I", c->next_hop_addr, c->link_addr);
                   2474:       }
                   2475: 
                   2476:       if (c->igp_table_ip4)
                   2477:        cli_msg(-1006, "    IGP IPv4 table: %s", c->igp_table_ip4->name);
                   2478: 
                   2479:       if (c->igp_table_ip6)
                   2480:        cli_msg(-1006, "    IGP IPv6 table: %s", c->igp_table_ip6->name);
                   2481:     }
                   2482:   }
                   2483: }
                   2484: 
                   2485: struct channel_class channel_bgp = {
                   2486:   .channel_size =      sizeof(struct bgp_channel),
                   2487:   .config_size =       sizeof(struct bgp_channel_config),
                   2488:   .init =              bgp_channel_init,
                   2489:   .start =             bgp_channel_start,
                   2490:   .shutdown =          bgp_channel_shutdown,
                   2491:   .cleanup =           bgp_channel_cleanup,
                   2492:   .reconfigure =       bgp_channel_reconfigure,
                   2493: };
                   2494: 
                   2495: struct protocol proto_bgp = {
                   2496:   .name =              "BGP",
                   2497:   .template =          "bgp%d",
                   2498:   .class =             PROTOCOL_BGP,
                   2499:   .preference =        DEF_PREF_BGP,
                   2500:   .channel_mask =      NB_IP | NB_VPN | NB_FLOW,
                   2501:   .proto_size =                sizeof(struct bgp_proto),
                   2502:   .config_size =       sizeof(struct bgp_config),
                   2503:   .postconfig =                bgp_postconfig,
                   2504:   .init =              bgp_init,
                   2505:   .start =             bgp_start,
                   2506:   .shutdown =          bgp_shutdown,
                   2507:   .reconfigure =       bgp_reconfigure,
                   2508:   .copy_config =       bgp_copy_config,
                   2509:   .get_status =        bgp_get_status,
                   2510:   .get_attr =          bgp_get_attr,
                   2511:   .get_route_info =    bgp_get_route_info,
                   2512:   .show_proto_info =   bgp_show_proto_info
                   2513: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>