File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / proto / bgp / bgp.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Aug 22 12:33:54 2017 UTC (6 years, 10 months ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_3p0, v1_6_3, HEAD
bird 1.6.3

    1: /*
    2:  *	BIRD -- The Border Gateway Protocol
    3:  *
    4:  *	(c) 2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: #ifndef _BIRD_BGP_H_
   10: #define _BIRD_BGP_H_
   11: 
   12: #include <stdint.h>
   13: #include "nest/route.h"
   14: #include "nest/bfd.h"
   15: #include "lib/hash.h"
   16: 
   17: struct linpool;
   18: struct eattr;
   19: 
   20: struct bgp_config {
   21:   struct proto_config c;
   22:   u32 local_as, remote_as;
   23:   ip_addr remote_ip;
   24:   ip_addr source_addr;			/* Source address to use */
   25:   struct iface *iface;			/* Interface for link-local addresses */
   26:   u16 remote_port; 			/* Neighbor destination port */
   27:   int multihop;				/* Number of hops if multihop */
   28:   int ttl_security;			/* Enable TTL security [RFC5082] */
   29:   int next_hop_self;			/* Always set next hop to local IP address */
   30:   int next_hop_keep;			/* Do not touch next hop attribute */
   31:   int missing_lladdr;			/* What we will do when we don' know link-local addr, see MLL_* */
   32:   int gw_mode;				/* How we compute route gateway from next_hop attr, see GW_* */
   33:   int compare_path_lengths;		/* Use path lengths when selecting best route */
   34:   int med_metric;			/* Compare MULTI_EXIT_DISC even between routes from differen ASes */
   35:   int igp_metric;			/* Use IGP metrics when selecting best route */
   36:   int prefer_older;			/* Prefer older routes according to RFC 5004 */
   37:   int deterministic_med;		/* Use more complicated algo to have strict RFC 4271 MED comparison */
   38:   u32 default_local_pref;		/* Default value for LOCAL_PREF attribute */
   39:   u32 default_med;			/* Default value for MULTI_EXIT_DISC attribute */
   40:   int capabilities;			/* Enable capability handshake [RFC3392] */
   41:   int enable_refresh;			/* Enable local support for route refresh [RFC2918] */
   42:   int enable_as4;			/* Enable local support for 4B AS numbers [RFC4893] */
   43:   int enable_extended_messages;		/* Enable local support for extended messages [draft] */
   44:   u32 rr_cluster_id;			/* Route reflector cluster ID, if different from local ID */
   45:   int rr_client;			/* Whether neighbor is RR client of me */
   46:   int rs_client;			/* Whether neighbor is RS client of me */
   47:   int advertise_ipv4;			/* Whether we should add IPv4 capability advertisement to OPEN message */
   48:   int passive;				/* Do not initiate outgoing connection */
   49:   int interpret_communities;		/* Hardwired handling of well-known communities */
   50:   int secondary;			/* Accept also non-best routes (i.e. RA_ACCEPTED) */
   51:   int add_path;				/* Use ADD-PATH extension [draft] */
   52:   int allow_local_as;			/* Allow that number of local ASNs in incoming AS_PATHs */
   53:   int gr_mode;				/* Graceful restart mode (BGP_GR_*) */
   54:   int setkey;				/* Set MD5 password to system SA/SP database */
   55:   unsigned gr_time;			/* Graceful restart timeout */
   56:   unsigned connect_delay_time;		/* Minimum delay between connect attempts */
   57:   unsigned connect_retry_time;		/* Timeout for connect attempts */
   58:   unsigned hold_time, initial_hold_time;
   59:   unsigned keepalive_time;
   60:   unsigned error_amnesia_time;		/* Errors are forgotten after */
   61:   unsigned error_delay_time_min;	/* Time to wait after an error is detected */
   62:   unsigned error_delay_time_max;
   63:   unsigned disable_after_error;		/* Disable the protocol when error is detected */
   64: 
   65:   char *password;			/* Password used for MD5 authentication */
   66:   struct rtable_config *igp_table;	/* Table used for recursive next hop lookups */
   67:   int check_link;			/* Use iface link state for liveness detection */
   68:   int bfd;				/* Use BFD for liveness detection */
   69: };
   70: 
   71: #define MLL_SELF 1
   72: #define MLL_DROP 2
   73: #define MLL_IGNORE 3
   74: 
   75: #define GW_DIRECT 1
   76: #define GW_RECURSIVE 2
   77: 
   78: #define ADD_PATH_RX 1
   79: #define ADD_PATH_TX 2
   80: #define ADD_PATH_FULL 3
   81: 
   82: #define BGP_GR_ABLE 1
   83: #define BGP_GR_AWARE 2
   84: 
   85: /* For peer_gr_flags */
   86: #define BGP_GRF_RESTART 0x80
   87: 
   88: /* For peer_gr_aflags */
   89: #define BGP_GRF_FORWARDING 0x80
   90: 
   91: 
   92: struct bgp_conn {
   93:   struct bgp_proto *bgp;
   94:   struct birdsock *sk;
   95:   uint state;				/* State of connection state machine */
   96:   struct timer *connect_retry_timer;
   97:   struct timer *hold_timer;
   98:   struct timer *keepalive_timer;
   99:   struct event *tx_ev;
  100:   int packets_to_send;			/* Bitmap of packet types to be sent */
  101:   int notify_code, notify_subcode, notify_size;
  102:   byte *notify_data;
  103:   u32 advertised_as;			/* Temporary value for AS number received */
  104:   int start_state;			/* protocol start_state snapshot when connection established */
  105:   u8 peer_refresh_support;		/* Peer supports route refresh [RFC2918] */
  106:   u8 peer_as4_support;			/* Peer supports 4B AS numbers [RFC4893] */
  107:   u8 peer_add_path;			/* Peer supports ADD-PATH [draft] */
  108:   u8 peer_enhanced_refresh_support;	/* Peer supports enhanced refresh [RFC7313] */
  109:   u8 peer_gr_aware;
  110:   u8 peer_gr_able;
  111:   u16 peer_gr_time;
  112:   u8 peer_gr_flags;
  113:   u8 peer_gr_aflags;
  114:   u8 peer_ext_messages_support;		/* Peer supports extended message length [draft] */
  115:   unsigned hold_time, keepalive_time;	/* Times calculated from my and neighbor's requirements */
  116: };
  117: 
  118: struct bgp_proto {
  119:   struct proto p;
  120:   struct bgp_config *cf;		/* Shortcut to BGP configuration */
  121:   u32 local_as, remote_as;
  122:   int start_state;			/* Substates that partitions BS_START */
  123:   u8 is_internal;			/* Internal BGP connection (local_as == remote_as) */
  124:   u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
  125:   u8 add_path_rx;			/* Session expects receive of ADD-PATH extended NLRI */
  126:   u8 add_path_tx;			/* Session expects transmit of ADD-PATH extended NLRI */
  127:   u8 ext_messages;			/* Session allows to use extended messages (both sides support it) */
  128:   u32 local_id;				/* BGP identifier of this router */
  129:   u32 remote_id;			/* BGP identifier of the neighbor */
  130:   u32 rr_cluster_id;			/* Route reflector cluster ID */
  131:   int rr_client;			/* Whether neighbor is RR client of me */
  132:   int rs_client;			/* Whether neighbor is RS client of me */
  133:   u8 gr_ready;				/* Neighbor could do graceful restart */
  134:   u8 gr_active;				/* Neighbor is doing graceful restart */
  135:   u8 feed_state;			/* Feed state (TX) for EoR, RR packets, see BFS_* */
  136:   u8 load_state;			/* Load state (RX) for EoR, RR packets, see BFS_* */
  137:   struct bgp_conn *conn;		/* Connection we have established */
  138:   struct bgp_conn outgoing_conn;	/* Outgoing connection we're working with */
  139:   struct bgp_conn incoming_conn;	/* Incoming connection we have neither accepted nor rejected yet */
  140:   struct object_lock *lock;		/* Lock for neighbor connection */
  141:   struct neighbor *neigh;		/* Neighbor entry corresponding to remote ip, NULL if multihop */
  142:   struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
  143:   ip_addr source_addr;			/* Local address used as an advertised next hop */
  144:   rtable *igp_table;			/* Table used for recursive next hop lookups */
  145:   struct event *event;			/* Event for respawning and shutting process */
  146:   struct timer *startup_timer;		/* Timer used to delay protocol startup due to previous errors (startup_delay) */
  147:   struct timer *gr_timer;		/* Timer waiting for reestablishment after graceful restart */
  148:   struct bgp_bucket **bucket_hash;	/* Hash table of attribute buckets */
  149:   uint hash_size, hash_count, hash_limit;
  150:   HASH(struct bgp_prefix) prefix_hash;	/* Prefixes to be sent */
  151:   slab *prefix_slab;			/* Slab holding prefix nodes */
  152:   list bucket_queue;			/* Queue of buckets to send */
  153:   struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
  154:   unsigned startup_delay;		/* Time to delay protocol startup by due to errors */
  155:   bird_clock_t last_proto_error;	/* Time of last error that leads to protocol stop */
  156:   u8 last_error_class; 			/* Error class of last error */
  157:   u32 last_error_code;			/* Error code of last error. BGP protocol errors
  158: 					   are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
  159: #ifdef IPV6
  160:   byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
  161:   unsigned mp_reach_len, mp_unreach_len;
  162:   ip_addr local_link;			/* Link-level version of source_addr */
  163: #endif
  164: };
  165: 
  166: struct bgp_prefix {
  167:   struct {
  168:     ip_addr prefix;
  169:     int pxlen;
  170:   } n;
  171:   u32 path_id;
  172:   struct bgp_prefix *next;
  173:   node bucket_node;			/* Node in per-bucket list */
  174: };
  175: 
  176: struct bgp_bucket {
  177:   node send_node;			/* Node in send queue */
  178:   struct bgp_bucket *hash_next, *hash_prev;	/* Node in bucket hash table */
  179:   unsigned hash;			/* Hash over extended attributes */
  180:   list prefixes;			/* Prefixes in this buckets */
  181:   ea_list eattrs[0];			/* Per-bucket extended attributes */
  182: };
  183: 
  184: #define BGP_PORT		179
  185: #define BGP_VERSION		4
  186: #define BGP_HEADER_LENGTH	19
  187: #define BGP_MAX_MESSAGE_LENGTH	4096
  188: #define BGP_MAX_EXT_MSG_LENGTH	65535
  189: #define BGP_RX_BUFFER_SIZE	4096
  190: #define BGP_TX_BUFFER_SIZE	4096
  191: #define BGP_RX_BUFFER_EXT_SIZE	65535
  192: #define BGP_TX_BUFFER_EXT_SIZE	65535
  193: 
  194: static inline uint bgp_max_packet_length(struct bgp_proto *p)
  195: { return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
  196: 
  197: extern struct linpool *bgp_linpool;
  198: 
  199: 
  200: void bgp_start_timer(struct timer *t, int value);
  201: void bgp_check_config(struct bgp_config *c);
  202: void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
  203: void bgp_close_conn(struct bgp_conn *c);
  204: void bgp_update_startup_delay(struct bgp_proto *p);
  205: void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
  206: void bgp_conn_enter_established_state(struct bgp_conn *conn);
  207: void bgp_conn_enter_close_state(struct bgp_conn *conn);
  208: void bgp_conn_enter_idle_state(struct bgp_conn *conn);
  209: void bgp_handle_graceful_restart(struct bgp_proto *p);
  210: void bgp_graceful_restart_done(struct bgp_proto *p);
  211: void bgp_refresh_begin(struct bgp_proto *p);
  212: void bgp_refresh_end(struct bgp_proto *p);
  213: void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
  214: void bgp_stop(struct bgp_proto *p, unsigned subcode);
  215: 
  216: struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
  217: struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
  218: 
  219: 
  220: 
  221: #ifdef LOCAL_DEBUG
  222: #define BGP_FORCE_DEBUG 1
  223: #else
  224: #define BGP_FORCE_DEBUG 0
  225: #endif
  226: #define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
  227: 	log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
  228: 
  229: #define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
  230: 	log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
  231: 
  232: 
  233: /* attrs.c */
  234: 
  235: /* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
  236:  * we store two addesses in it - a global address and a link local address.
  237:  */
  238: #ifdef IPV6
  239: #define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
  240: static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
  241: #else
  242: #define NEXT_HOP_LENGTH sizeof(ip_addr)
  243: static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
  244: #endif
  245: 
  246: void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
  247: byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
  248: struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
  249: int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
  250: int bgp_rte_better(struct rte *, struct rte *);
  251: int bgp_rte_mergable(rte *pri, rte *sec);
  252: int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
  253: void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
  254: int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
  255: void bgp_init_bucket_table(struct bgp_proto *);
  256: void bgp_free_bucket_table(struct bgp_proto *p);
  257: void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
  258: void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
  259: void bgp_free_prefix_table(struct bgp_proto *p);
  260: void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
  261: uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
  262: void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
  263: 
  264: inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
  265: { *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
  266: 
  267: /* packets.c */
  268: 
  269: void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
  270: void bgp_schedule_packet(struct bgp_conn *conn, int type);
  271: void bgp_kick_tx(void *vconn);
  272: void bgp_tx(struct birdsock *sk);
  273: int bgp_rx(struct birdsock *sk, uint size);
  274: const char * bgp_error_dsc(unsigned code, unsigned subcode);
  275: void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
  276: 
  277: /* Packet types */
  278: 
  279: #define PKT_OPEN		0x01
  280: #define PKT_UPDATE		0x02
  281: #define PKT_NOTIFICATION	0x03
  282: #define PKT_KEEPALIVE		0x04
  283: #define PKT_ROUTE_REFRESH	0x05	/* [RFC2918] */
  284: #define PKT_BEGIN_REFRESH	0x1e	/* Dummy type for BoRR packet [RFC7313] */
  285: #define PKT_SCHEDULE_CLOSE	0x1f	/* Used internally to schedule socket close */
  286: 
  287: /* Attributes */
  288: 
  289: #define BAF_OPTIONAL		0x80
  290: #define BAF_TRANSITIVE		0x40
  291: #define BAF_PARTIAL		0x20
  292: #define BAF_EXT_LEN		0x10
  293: 
  294: #define BA_ORIGIN		0x01	/* [RFC1771] */		/* WM */
  295: #define BA_AS_PATH		0x02				/* WM */
  296: #define BA_NEXT_HOP		0x03				/* WM */
  297: #define BA_MULTI_EXIT_DISC	0x04				/* ON */
  298: #define BA_LOCAL_PREF		0x05				/* WD */
  299: #define BA_ATOMIC_AGGR		0x06				/* WD */
  300: #define BA_AGGREGATOR		0x07				/* OT */
  301: #define BA_COMMUNITY		0x08	/* [RFC1997] */		/* OT */
  302: #define BA_ORIGINATOR_ID	0x09	/* [RFC1966] */		/* ON */
  303: #define BA_CLUSTER_LIST		0x0a				/* ON */
  304: /* We don't support these: */
  305: #define BA_DPA			0x0b	/* ??? */
  306: #define BA_ADVERTISER		0x0c	/* [RFC1863] */
  307: #define BA_RCID_PATH		0x0d
  308: #define BA_MP_REACH_NLRI	0x0e	/* [RFC2283] */
  309: #define BA_MP_UNREACH_NLRI	0x0f
  310: #define BA_EXT_COMMUNITY	0x10	/* [RFC4360] */
  311: #define BA_AS4_PATH             0x11    /* [RFC4893] */
  312: #define BA_AS4_AGGREGATOR       0x12
  313: #define BA_LARGE_COMMUNITY	0x20	/* [draft-ietf-idr-large-community] */
  314: 
  315: /* BGP connection states */
  316: 
  317: #define BS_IDLE			0
  318: #define BS_CONNECT		1	/* Attempting to connect */
  319: #define BS_ACTIVE		2	/* Waiting for connection retry & listening */
  320: #define BS_OPENSENT		3
  321: #define BS_OPENCONFIRM		4
  322: #define BS_ESTABLISHED		5
  323: #define BS_CLOSE		6	/* Used during transition to BS_IDLE */
  324: 
  325: #define BS_MAX			7
  326: 
  327: /* BGP start states
  328:  *
  329:  * Used in PS_START for fine-grained specification of starting state.
  330:  *
  331:  * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
  332:  * protocol done what is neccessary to start itself (like acquiring the lock),
  333:  * it goes to BSS_CONNECT.  When some connection attempt failed because of
  334:  * option or capability error, it goes to BSS_CONNECT_NOCAP.
  335:  */
  336: 
  337: #define BSS_PREPARE		0	/* Used before ordinary BGP started, i. e. waiting for lock */
  338: #define BSS_DELAY		1	/* Startup delay due to previous errors */
  339: #define BSS_CONNECT		2	/* Ordinary BGP connecting */
  340: #define BSS_CONNECT_NOCAP	3	/* Legacy BGP connecting (without capabilities) */
  341: 
  342: 
  343: /* BGP feed states (TX)
  344:  *
  345:  * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
  346:  *
  347:  * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
  348:  *
  349:  * These states (stored in p->feed_state) are used to keep track of these
  350:  * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
  351:  * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
  352:  * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
  353:  *
  354:  * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
  355:  * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
  356:  * demarcation) is active, BFS_NONE is set.
  357:  *
  358:  * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
  359:  * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
  360:  */
  361: 
  362: #define BFS_NONE		0	/* No feed or original non-demarcated feed */
  363: #define BFS_LOADING		1	/* Initial feed active, End-of-RIB planned */
  364: #define BFS_LOADED		2	/* Loading done, End-of-RIB marker scheduled */
  365: #define BFS_REFRESHING		3	/* Route refresh (introduced by BoRR) active */
  366: #define BFS_REFRESHED		4	/* Refresh done, EoRR packet scheduled */
  367: 
  368: 
  369: /* Error classes */
  370: 
  371: #define BE_NONE			0
  372: #define BE_MISC			1	/* Miscellaneous error */
  373: #define BE_SOCKET		2	/* Socket error */
  374: #define BE_BGP_RX		3	/* BGP protocol error notification received */
  375: #define BE_BGP_TX		4	/* BGP protocol error notification sent */
  376: #define BE_AUTO_DOWN		5	/* Automatic shutdown */
  377: #define BE_MAN_DOWN		6	/* Manual shutdown */
  378: 
  379: /* Misc error codes */
  380: 
  381: #define BEM_NEIGHBOR_LOST	1
  382: #define BEM_INVALID_NEXT_HOP	2
  383: #define BEM_INVALID_MD5		3	/* MD5 authentication kernel request failed (possibly not supported) */
  384: #define BEM_NO_SOCKET		4
  385: #define BEM_LINK_DOWN		5
  386: #define BEM_BFD_DOWN		6
  387: #define BEM_GRACEFUL_RESTART	7
  388: 
  389: /* Automatic shutdown error codes */
  390: 
  391: #define BEA_ROUTE_LIMIT_EXCEEDED 1
  392: 
  393: /* Well-known communities */
  394: 
  395: #define BGP_COMM_NO_EXPORT		0xffffff01	/* Don't export outside local AS / confed. */
  396: #define BGP_COMM_NO_ADVERTISE		0xffffff02	/* Don't export at all */
  397: #define BGP_COMM_NO_EXPORT_SUBCONFED	0xffffff03	/* NO_EXPORT even in local confederation */
  398: 
  399: /* Origins */
  400: 
  401: #define ORIGIN_IGP		0
  402: #define ORIGIN_EGP		1
  403: #define ORIGIN_INCOMPLETE	2
  404: 
  405: /* Address families */
  406: 
  407: #define BGP_AF_IPV4		1
  408: #define BGP_AF_IPV6		2
  409: 
  410: #ifdef IPV6
  411: #define BGP_AF BGP_AF_IPV6
  412: #else
  413: #define BGP_AF BGP_AF_IPV4
  414: #endif
  415: 
  416: #endif

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>