--- embedaddon/bird/proto/bgp/packets.c 2017/08/22 12:33:54 1.1.1.1 +++ embedaddon/bird/proto/bgp/packets.c 2021/03/17 19:50:23 1.1.1.2 @@ -13,7 +13,7 @@ #include "nest/protocol.h" #include "nest/route.h" #include "nest/attrs.h" -#include "nest/mrtdump.h" +#include "proto/mrt/mrt.h" #include "conf/conf.h" #include "lib/unaligned.h" #include "lib/socket.h" @@ -38,81 +38,45 @@ static byte fsm_err_subcode[BS_MAX] = { [BS_ESTABLISHED] = 3 }; -/* - * MRT Dump format is not semantically specified. - * We will use these values in appropriate fields: - * - * Local AS, Remote AS - configured AS numbers for given BGP instance. - * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection) - * - * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state - * changes) and MESSAGE (for received BGP messages). - * - * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant - * only when AS4 session is established and even in that case MESSAGE - * does not use AS4 variant for initial OPEN message. This strange - * behavior is here for compatibility with Quagga and Bgpdump, - */ - -static byte * -mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4) +static void +init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d) { struct bgp_proto *p = conn->bgp; + int p_ok = conn->state >= BS_OPENCONFIRM; - if (as4) - { - put_u32(buf+0, p->remote_as); - put_u32(buf+4, p->local_as); - buf+=8; - } - else - { - put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS); - put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS); - buf+=4; - } - - put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0); - put_u16(buf+2, BGP_AF); - buf+=4; - buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE); - buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE); - - return buf; + memset(d, 0, sizeof(struct mrt_bgp_data)); + d->peer_as = p->remote_as; + d->local_as = p->local_as; + d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0; + d->af = BGP_AF; + d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE; + d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE; + d->as4 = p_ok ? p->as4_session : 0; + d->add_path = p_ok ? p->add_path_rx : 0; } static void -mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len) +bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len) { - byte *buf = alloca(128+len); /* 128 is enough for MRT headers */ - byte *bp = buf + MRTDUMP_HDR_LENGTH; - int as4 = conn->bgp->as4_session; + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); - bp = mrt_put_bgp4_hdr(bp, conn, as4); - memcpy(bp, pkt, len); - bp += len; - mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE, - buf, bp-buf); -} + d.message = pkt; + d.msg_len = len; -static inline u16 -convert_state(unsigned state) -{ - /* Convert state from our BS_* values to values used in MRTDump */ - return (state == BS_CLOSE) ? 1 : state + 1; + mrt_dump_bgp_message(&d); } void -mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new) +bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new) { - byte buf[128]; - byte *bp = buf + MRTDUMP_HDR_LENGTH; + struct mrt_bgp_data d; + init_mrt_bgp_data(conn, &d); - bp = mrt_put_bgp4_hdr(bp, conn, 1); - put_u16(bp+0, convert_state(old)); - put_u16(bp+2, convert_state(new)); - bp += 4; - mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf); + d.old_state = old; + d.new_state = new; + + mrt_dump_bgp_state_change(&d); } static byte * @@ -231,8 +195,34 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) return buf; } +static byte * +bgp_put_cap_llgr1(struct bgp_proto *p, byte *buf) +{ + *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */ + *buf++ = 7; /* Capability data length */ + *buf++ = 0; /* Appropriate AF */ + *buf++ = BGP_AF; + *buf++ = 1; /* and SAFI 1 */ + + /* Next is 8bit flags and 24bit time */ + put_u32(buf, p->cf->llgr_time); + buf[0] = p->p.gr_recovery ? BGP_LLGRF_FORWARDING : 0; + buf += 4; + + return buf; +} + static byte * +bgp_put_cap_llgr2(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */ + *buf++ = 0; /* Capability data length */ + return buf; +} + + +static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) { struct bgp_proto *p = conn->bgp; @@ -285,6 +275,11 @@ bgp_create_open(struct bgp_conn *conn, byte *buf) if (p->cf->enable_extended_messages) cap = bgp_put_cap_ext_msg(p, cap); + if (p->cf->llgr_mode == BGP_LLGR_ABLE) + cap = bgp_put_cap_llgr1(p, cap); + else if (p->cf->llgr_mode == BGP_LLGR_AWARE) + cap = bgp_put_cap_llgr2(p, cap); + cap_len = cap - buf - 12; if (cap_len > 0) { @@ -789,8 +784,12 @@ bgp_kick_tx(void *vconn) struct bgp_conn *conn = vconn; DBG("BGP: kicking TX\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } void @@ -799,8 +798,12 @@ bgp_tx(sock *sk) struct bgp_conn *conn = sk->data; DBG("BGP: TX hook\n"); - while (bgp_fire_tx(conn) > 0) + uint max = 1024; + while (--max && (bgp_fire_tx(conn) > 0)) ; + + if (!max && !ev_active(conn->tx_ev)) + ev_schedule(conn->tx_ev); } /* Capatibility negotiation as per RFC 2842 */ @@ -856,7 +859,7 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *op conn->advertised_as = get_u32(opt + 2); break; - case 69: /* ADD-PATH capability, draft */ + case 69: /* ADD-PATH capability, RFC 7911 */ if (cl % 4) goto err; for (i = 0; i < cl; i += 4) @@ -872,11 +875,38 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *op conn->peer_enhanced_refresh_support = 1; break; + case 71: /* Long-lived graceful restart capability, RFC draft */ + if (cl % 7) + goto err; + conn->peer_llgr_aware = 1; + conn->peer_llgr_able = 0; + conn->peer_llgr_time = 0; + conn->peer_llgr_aflags = 0; + for (i = 0; i < cl; i += 7) + if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */ + { + conn->peer_llgr_able = 1; + conn->peer_llgr_time = get_u32(opt + 2+i+3) & 0xffffff; + conn->peer_llgr_aflags = opt[2+i+3]; + } + break; + /* We can safely ignore all other capabilities */ } len -= 2 + cl; opt += 2 + cl; } + + /* The LLGR capability must be advertised together with the GR capability, + otherwise it must be disregarded */ + if (!conn->peer_gr_aware && conn->peer_llgr_aware) + { + conn->peer_llgr_aware = 0; + conn->peer_llgr_able = 0; + conn->peer_llgr_time = 0; + conn->peer_llgr_aflags = 0; + } + return; err: @@ -1034,11 +1064,17 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len p->as4_session = p->cf->enable_as4 && conn->peer_as4_support; p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX); p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX); - p->gr_ready = p->cf->gr_mode && conn->peer_gr_able; + p->gr_ready = (p->cf->gr_mode && conn->peer_gr_able) || + (p->cf->llgr_mode && conn->peer_llgr_able); p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support; + /* Update RA mode */ if (p->add_path_tx) p->p.accept_ra_types = RA_ANY; + else if (p->cf->secondary) + p->p.accept_ra_types = RA_ACCEPTED; + else + p->p.accept_ra_types = RA_OPTIMAL; DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session); @@ -1120,6 +1156,7 @@ bgp_rte_update(struct bgp_proto *p, ip_addr prefix, in e->net = n; e->pflags = 0; e->u.bgp.suppressed = 0; + e->u.bgp.stale = -1; rte_update2(p->p.main_ahook, n, e, *src); } @@ -1489,38 +1526,72 @@ bgp_error_dsc(unsigned code, unsigned subcode) return buff; } +/* RFC 8203 - shutdown communication message */ +static int +bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp) +{ + byte *msg = data + 1; + uint msg_len = data[0]; + uint i; + + /* Handle zero length message */ + if (msg_len == 0) + return 1; + + /* Handle proper message */ + if (msg_len + 1 > len) + return 0; + + /* Some elementary cleanup */ + for (i = 0; i < msg_len; i++) + if (msg[i] < ' ') + msg[i] = ' '; + + proto_set_message(&p->p, msg, msg_len); + *bp += bsprintf(*bp, ": \"%s\"", p->p.message); + return 1; +} + void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len) { - const byte *name; - byte *t, argbuf[36]; + byte argbuf[256+16], *t = argbuf; unsigned i; /* Don't report Cease messages generated by myself */ if (code == 6 && class == BE_BGP_TX) return; - name = bgp_error_dsc(code, subcode); - t = argbuf; + /* Reset shutdown message */ + if ((code == 6) && ((subcode == 2) || (subcode == 4))) + proto_set_message(&p->p, NULL, 0); + if (len) { - *t++ = ':'; - *t++ = ' '; - + /* Bad peer AS - we would like to print the AS */ if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4))) { - /* Bad peer AS - we would like to print the AS */ - t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data)); + t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data)); goto done; } + + /* RFC 8203 - shutdown communication */ + if (((code == 6) && ((subcode == 2) || (subcode == 4)))) + if (bgp_handle_message(p, data, len, &t)) + goto done; + + *t++ = ':'; + *t++ = ' '; if (len > 16) len = 16; for (i=0; ip.name, msg, name, argbuf); + const byte *dsc = bgp_error_dsc(code, subcode); + log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf); } static void @@ -1566,8 +1637,18 @@ bgp_rx_notification(struct bgp_conn *conn, byte *pkt, if (err) { bgp_update_startup_delay(p); - bgp_stop(p, 0); + bgp_stop(p, 0, NULL, 0); } + else + { + uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0); + if (p->cf->disable_after_cease & subcode_bit) + { + log(L_INFO "%s: Disabled after Cease notification", p->p.name); + p->startup_delay = 0; + p->p.disabled = 1; + } + } } static void @@ -1655,7 +1736,7 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsign DBG("BGP: Got packet %02x (%d bytes)\n", type, len); if (conn->bgp->p.mrtdump & MD_MESSAGES) - mrt_dump_bgp_packet(conn, pkt, len); + bgp_dump_message(conn, pkt, len); switch (type) {