File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird2 / proto / bgp / packets.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Oct 21 16:03:56 2019 UTC (5 years, 5 months ago) by misho
Branches: bird2, MAIN
CVS tags: v2_0_7p0, HEAD
bird2 ver 2.0.7

    1: /*
    2:  *	BIRD -- BGP Packet Processing
    3:  *
    4:  *	(c) 2000 Martin Mares <mj@ucw.cz>
    5:  *	(c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
    6:  *	(c) 2008--2016 CZ.NIC z.s.p.o.
    7:  *
    8:  *	Can be freely distributed and used under the terms of the GNU GPL.
    9:  */
   10: 
   11: #undef LOCAL_DEBUG
   12: 
   13: #include <stdlib.h>
   14: 
   15: #include "nest/bird.h"
   16: #include "nest/iface.h"
   17: #include "nest/protocol.h"
   18: #include "nest/route.h"
   19: #include "nest/attrs.h"
   20: #include "proto/mrt/mrt.h"
   21: #include "conf/conf.h"
   22: #include "lib/unaligned.h"
   23: #include "lib/flowspec.h"
   24: #include "lib/socket.h"
   25: 
   26: #include "nest/cli.h"
   27: 
   28: #include "bgp.h"
   29: 
   30: 
   31: #define BGP_RR_REQUEST		0
   32: #define BGP_RR_BEGIN		1
   33: #define BGP_RR_END		2
   34: 
   35: #define BGP_NLRI_MAX		(4 + 1 + 32)
   36: 
   37: #define BGP_MPLS_BOS		1	/* Bottom-of-stack bit */
   38: #define BGP_MPLS_MAX		10	/* Max number of labels that 24*n <= 255 */
   39: #define BGP_MPLS_NULL		3	/* Implicit NULL label */
   40: #define BGP_MPLS_MAGIC		0x800000 /* Magic withdraw label value, RFC 3107 3 */
   41: 
   42: 
   43: static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
   44: static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
   45: 
   46: /* Table for state -> RFC 6608 FSM error subcodes */
   47: static byte fsm_err_subcode[BS_MAX] = {
   48:   [BS_OPENSENT] = 1,
   49:   [BS_OPENCONFIRM] = 2,
   50:   [BS_ESTABLISHED] = 3
   51: };
   52: 
   53: 
   54: static struct bgp_channel *
   55: bgp_get_channel(struct bgp_proto *p, u32 afi)
   56: {
   57:   uint i;
   58: 
   59:   for (i = 0; i < p->channel_count; i++)
   60:     if (p->afi_map[i] == afi)
   61:       return p->channel_map[i];
   62: 
   63:   return NULL;
   64: }
   65: 
   66: static inline void
   67: put_af3(byte *buf, u32 id)
   68: {
   69:   put_u16(buf, id >> 16);
   70:   buf[2] = id & 0xff;
   71: }
   72: 
   73: static inline void
   74: put_af4(byte *buf, u32 id)
   75: {
   76:   put_u16(buf, id >> 16);
   77:   buf[2] = 0;
   78:   buf[3] = id & 0xff;
   79: }
   80: 
   81: static inline u32
   82: get_af3(byte *buf)
   83: {
   84:   return (get_u16(buf) << 16) | buf[2];
   85: }
   86: 
   87: static inline u32
   88: get_af4(byte *buf)
   89: {
   90:   return (get_u16(buf) << 16) | buf[3];
   91: }
   92: 
   93: static void
   94: init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
   95: {
   96:   struct bgp_proto *p = conn->bgp;
   97:   int p_ok = conn->state >= BS_OPENCONFIRM;
   98: 
   99:   memset(d, 0, sizeof(struct mrt_bgp_data));
  100:   d->peer_as = p->remote_as;
  101:   d->local_as = p->local_as;
  102:   d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
  103:   d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
  104:   d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
  105:   d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
  106:   d->as4 = p_ok ? p->as4_session : 0;
  107: }
  108: 
  109: static uint bgp_find_update_afi(byte *pos, uint len);
  110: 
  111: static int
  112: bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len)
  113: {
  114:   /* No need to estimate it for other messages than UPDATE */
  115:   if (pkt[18] != PKT_UPDATE)
  116:     return 0;
  117: 
  118:   /* 1 -> no channel, 2 -> all channels, 3 -> some channels */
  119:   if (p->summary_add_path_rx < 3)
  120:     return p->summary_add_path_rx == 2;
  121: 
  122:   uint afi = bgp_find_update_afi(pkt, len);
  123:   struct bgp_channel *c = bgp_get_channel(p, afi);
  124:   if (!c)
  125:   {
  126:     /* Either frame error (if !afi) or unknown AFI/SAFI,
  127:        will be reported later in regular parsing */
  128:     BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet");
  129:     return 0;
  130:   }
  131: 
  132:   return c->add_path_rx;
  133: }
  134: 
  135: static void
  136: bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
  137: {
  138:   struct mrt_bgp_data d;
  139:   init_mrt_bgp_data(conn, &d);
  140: 
  141:   d.message = pkt;
  142:   d.msg_len = len;
  143:   d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len);
  144: 
  145:   mrt_dump_bgp_message(&d);
  146: }
  147: 
  148: void
  149: bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
  150: {
  151:   struct mrt_bgp_data d;
  152:   init_mrt_bgp_data(conn, &d);
  153: 
  154:   d.old_state = old;
  155:   d.new_state = new;
  156: 
  157:   mrt_dump_bgp_state_change(&d);
  158: }
  159: 
  160: static byte *
  161: bgp_create_notification(struct bgp_conn *conn, byte *buf)
  162: {
  163:   struct bgp_proto *p = conn->bgp;
  164: 
  165:   BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
  166:   buf[0] = conn->notify_code;
  167:   buf[1] = conn->notify_subcode;
  168:   memcpy(buf+2, conn->notify_data, conn->notify_size);
  169:   return buf + 2 + conn->notify_size;
  170: }
  171: 
  172: 
  173: /* Capability negotiation as per RFC 5492 */
  174: 
  175: const struct bgp_af_caps *
  176: bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
  177: {
  178:   struct bgp_af_caps *ac;
  179: 
  180:   WALK_AF_CAPS(caps, ac)
  181:     if (ac->afi == afi)
  182:       return ac;
  183: 
  184:   return NULL;
  185: }
  186: 
  187: static struct bgp_af_caps *
  188: bgp_get_af_caps(struct bgp_caps **pcaps, u32 afi)
  189: {
  190:   struct bgp_caps *caps = *pcaps;
  191:   struct bgp_af_caps *ac;
  192: 
  193:   WALK_AF_CAPS(caps, ac)
  194:     if (ac->afi == afi)
  195:       return ac;
  196: 
  197:   uint n = caps->af_count;
  198:   if (uint_is_pow2(n))
  199:     *pcaps = caps = mb_realloc(caps, sizeof(struct bgp_caps) +
  200: 			       (2 * n) * sizeof(struct bgp_af_caps));
  201: 
  202:   ac = &caps->af_data[caps->af_count++];
  203:   memset(ac, 0, sizeof(struct bgp_af_caps));
  204:   ac->afi = afi;
  205: 
  206:   return ac;
  207: }
  208: 
  209: static int
  210: bgp_af_caps_cmp(const void *X, const void *Y)
  211: {
  212:   const struct bgp_af_caps *x = X, *y = Y;
  213:   return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
  214: }
  215: 
  216: 
  217: void
  218: bgp_prepare_capabilities(struct bgp_conn *conn)
  219: {
  220:   struct bgp_proto *p = conn->bgp;
  221:   struct bgp_channel *c;
  222:   struct bgp_caps *caps;
  223:   struct bgp_af_caps *ac;
  224: 
  225:   if (!p->cf->capabilities)
  226:   {
  227:     /* Just prepare empty local_caps */
  228:     conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
  229:     return;
  230:   }
  231: 
  232:   /* Prepare bgp_caps structure */
  233:   int n = list_length(&p->p.channels);
  234:   caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
  235:   conn->local_caps = caps;
  236: 
  237:   caps->as4_support = p->cf->enable_as4;
  238:   caps->ext_messages = p->cf->enable_extended_messages;
  239:   caps->route_refresh = p->cf->enable_refresh;
  240:   caps->enhanced_refresh = p->cf->enable_refresh;
  241: 
  242:   if (caps->as4_support)
  243:     caps->as4_number = p->public_as;
  244: 
  245:   if (p->cf->gr_mode)
  246:   {
  247:     caps->gr_aware = 1;
  248:     caps->gr_time = p->cf->gr_time;
  249:     caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
  250:   }
  251: 
  252:   if (p->cf->llgr_mode)
  253:     caps->llgr_aware = 1;
  254: 
  255:   /* Allocate and fill per-AF fields */
  256:   WALK_LIST(c, p->p.channels)
  257:   {
  258:     ac = &caps->af_data[caps->af_count++];
  259:     ac->afi = c->afi;
  260:     ac->ready = 1;
  261: 
  262:     ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
  263:     caps->any_ext_next_hop |= ac->ext_next_hop;
  264: 
  265:     ac->add_path = c->cf->add_path;
  266:     caps->any_add_path |= ac->add_path;
  267: 
  268:     if (c->cf->gr_able)
  269:     {
  270:       ac->gr_able = 1;
  271: 
  272:       if (p->p.gr_recovery)
  273: 	ac->gr_af_flags |= BGP_GRF_FORWARDING;
  274:     }
  275: 
  276:     if (c->cf->llgr_able)
  277:     {
  278:       ac->llgr_able = 1;
  279:       ac->llgr_time = c->cf->llgr_time;
  280: 
  281:       if (p->p.gr_recovery)
  282: 	ac->llgr_flags |= BGP_LLGRF_FORWARDING;
  283:     }
  284:   }
  285: 
  286:   /* Sort capability fields by AFI/SAFI */
  287:   qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
  288: }
  289: 
  290: static byte *
  291: bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
  292: {
  293:   struct bgp_proto *p = conn->bgp;
  294:   struct bgp_caps *caps = conn->local_caps;
  295:   struct bgp_af_caps *ac;
  296:   byte *buf_head = buf;
  297:   byte *data;
  298: 
  299:   /* Create capability list in buffer */
  300: 
  301:   /*
  302:    * Note that max length is ~ 22+21*af_count. With max 12 channels that is
  303:    * 274. We are limited just by buffer size (4096, minus header), as we support
  304:    * extended optional parameres. Therefore, we have enough space for expansion.
  305:    */
  306: 
  307:   WALK_AF_CAPS(caps, ac)
  308:     if (ac->ready)
  309:     {
  310:       *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
  311:       *buf++ = 4;		/* Capability data length */
  312:       put_af4(buf, ac->afi);
  313:       buf += 4;
  314:     }
  315: 
  316:   if (caps->route_refresh)
  317:   {
  318:     *buf++ = 2;			/* Capability 2: Support for route refresh */
  319:     *buf++ = 0;			/* Capability data length */
  320:   }
  321: 
  322:   if (caps->any_ext_next_hop)
  323:   {
  324:     *buf++ = 5;			/* Capability 5: Support for extended next hop */
  325:     *buf++ = 0;			/* Capability data length, will be fixed later */
  326:     data = buf;
  327: 
  328:     WALK_AF_CAPS(caps, ac)
  329:       if (ac->ext_next_hop)
  330:       {
  331: 	put_af4(buf, ac->afi);
  332: 	put_u16(buf+4, BGP_AFI_IPV6);
  333: 	buf += 6;
  334:       }
  335: 
  336:     data[-1] = buf - data;
  337:   }
  338: 
  339:   if (caps->ext_messages)
  340:   {
  341:     *buf++ = 6;			/* Capability 6: Support for extended messages */
  342:     *buf++ = 0;			/* Capability data length */
  343:   }
  344: 
  345:   if (caps->gr_aware)
  346:   {
  347:     *buf++ = 64;		/* Capability 64: Support for graceful restart */
  348:     *buf++ = 0;			/* Capability data length, will be fixed later */
  349:     data = buf;
  350: 
  351:     put_u16(buf, caps->gr_time);
  352:     buf[0] |= caps->gr_flags;
  353:     buf += 2;
  354: 
  355:     WALK_AF_CAPS(caps, ac)
  356:       if (ac->gr_able)
  357:       {
  358: 	put_af3(buf, ac->afi);
  359: 	buf[3] = ac->gr_af_flags;
  360: 	buf += 4;
  361:       }
  362: 
  363:     data[-1] = buf - data;
  364:   }
  365: 
  366:   if (caps->as4_support)
  367:   {
  368:     *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
  369:     *buf++ = 4;			/* Capability data length */
  370:     put_u32(buf, p->public_as);
  371:     buf += 4;
  372:   }
  373: 
  374:   if (caps->any_add_path)
  375:   {
  376:     *buf++ = 69;		/* Capability 69: Support for ADD-PATH */
  377:     *buf++ = 0;			/* Capability data length, will be fixed later */
  378:     data = buf;
  379: 
  380:     WALK_AF_CAPS(caps, ac)
  381:       if (ac->add_path)
  382:       {
  383: 	put_af3(buf, ac->afi);
  384: 	buf[3] = ac->add_path;
  385: 	buf += 4;
  386:       }
  387: 
  388:     data[-1] = buf - data;
  389:   }
  390: 
  391:   if (caps->enhanced_refresh)
  392:   {
  393:     *buf++ = 70;		/* Capability 70: Support for enhanced route refresh */
  394:     *buf++ = 0;			/* Capability data length */
  395:   }
  396: 
  397:   if (caps->llgr_aware)
  398:   {
  399:     *buf++ = 71;		/* Capability 71: Support for long-lived graceful restart */
  400:     *buf++ = 0;			/* Capability data length, will be fixed later */
  401:     data = buf;
  402: 
  403:     WALK_AF_CAPS(caps, ac)
  404:       if (ac->llgr_able)
  405:       {
  406: 	put_af3(buf, ac->afi);
  407: 	buf[3] = ac->llgr_flags;
  408: 	put_u24(buf+4, ac->llgr_time);
  409: 	buf += 7;
  410:       }
  411: 
  412:     data[-1] = buf - data;
  413:   }
  414: 
  415:   caps->length = buf - buf_head;
  416: 
  417:   return buf;
  418: }
  419: 
  420: static int
  421: bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
  422: {
  423:   struct bgp_proto *p = conn->bgp;
  424:   struct bgp_caps *caps;
  425:   struct bgp_af_caps *ac;
  426:   int i, cl;
  427:   u32 af;
  428: 
  429:   if (!conn->remote_caps)
  430:     caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
  431:   else
  432:   {
  433:     caps = conn->remote_caps;
  434:     conn->remote_caps = NULL;
  435:   }
  436: 
  437:   caps->length += len;
  438: 
  439:   while (len > 0)
  440:   {
  441:     if (len < 2 || len < (2 + pos[1]))
  442:       goto err;
  443: 
  444:     /* Capability length */
  445:     cl = pos[1];
  446: 
  447:     /* Capability type */
  448:     switch (pos[0])
  449:     {
  450:     case  1: /* Multiprotocol capability, RFC 4760 */
  451:       if (cl != 4)
  452: 	goto err;
  453: 
  454:       af = get_af4(pos+2);
  455:       ac = bgp_get_af_caps(&caps, af);
  456:       ac->ready = 1;
  457:       break;
  458: 
  459:     case  2: /* Route refresh capability, RFC 2918 */
  460:       if (cl != 0)
  461: 	goto err;
  462: 
  463:       caps->route_refresh = 1;
  464:       break;
  465: 
  466:     case  5: /* Extended next hop encoding capability, RFC 5549 */
  467:       if (cl % 6)
  468: 	goto err;
  469: 
  470:       for (i = 0; i < cl; i += 6)
  471:       {
  472: 	/* Specified only for IPv4 prefixes with IPv6 next hops */
  473: 	if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
  474: 	    (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
  475: 	  continue;
  476: 
  477: 	af = get_af4(pos+2+i);
  478: 	ac = bgp_get_af_caps(&caps, af);
  479: 	ac->ext_next_hop = 1;
  480:       }
  481:       break;
  482: 
  483:     case  6: /* Extended message length capability, RFC draft */
  484:       if (cl != 0)
  485: 	goto err;
  486: 
  487:       caps->ext_messages = 1;
  488:       break;
  489: 
  490:     case 64: /* Graceful restart capability, RFC 4724 */
  491:       if (cl % 4 != 2)
  492: 	goto err;
  493: 
  494:       /* Only the last instance is valid */
  495:       WALK_AF_CAPS(caps, ac)
  496:       {
  497: 	ac->gr_able = 0;
  498: 	ac->gr_af_flags = 0;
  499:       }
  500: 
  501:       caps->gr_aware = 1;
  502:       caps->gr_flags = pos[2] & 0xf0;
  503:       caps->gr_time = get_u16(pos + 2) & 0x0fff;
  504: 
  505:       for (i = 2; i < cl; i += 4)
  506:       {
  507: 	af = get_af3(pos+2+i);
  508: 	ac = bgp_get_af_caps(&caps, af);
  509: 	ac->gr_able = 1;
  510: 	ac->gr_af_flags = pos[2+i+3];
  511:       }
  512:       break;
  513: 
  514:     case 65: /* AS4 capability, RFC 6793 */
  515:       if (cl != 4)
  516: 	goto err;
  517: 
  518:       caps->as4_support = 1;
  519:       caps->as4_number = get_u32(pos + 2);
  520:       break;
  521: 
  522:     case 69: /* ADD-PATH capability, RFC 7911 */
  523:       if (cl % 4)
  524: 	goto err;
  525: 
  526:       for (i = 0; i < cl; i += 4)
  527:       {
  528: 	byte val = pos[2+i+3];
  529: 	if (!val || (val > BGP_ADD_PATH_FULL))
  530: 	{
  531: 	  log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
  532: 	      p->p.name, val);
  533: 	  break;
  534: 	}
  535:       }
  536: 
  537:       for (i = 0; i < cl; i += 4)
  538:       {
  539: 	af = get_af3(pos+2+i);
  540: 	ac = bgp_get_af_caps(&caps, af);
  541: 	ac->add_path = pos[2+i+3];
  542:       }
  543:       break;
  544: 
  545:     case 70: /* Enhanced route refresh capability, RFC 7313 */
  546:       if (cl != 0)
  547: 	goto err;
  548: 
  549:       caps->enhanced_refresh = 1;
  550:       break;
  551: 
  552:     case 71: /* Long lived graceful restart capability, RFC draft */
  553:       if (cl % 7)
  554: 	goto err;
  555: 
  556:       /* Presumably, only the last instance is valid */
  557:       WALK_AF_CAPS(caps, ac)
  558:       {
  559: 	ac->llgr_able = 0;
  560: 	ac->llgr_flags = 0;
  561: 	ac->llgr_time = 0;
  562:       }
  563: 
  564:       caps->llgr_aware = 1;
  565: 
  566:       for (i = 0; i < cl; i += 7)
  567:       {
  568: 	af = get_af3(pos+2+i);
  569: 	ac = bgp_get_af_caps(&caps, af);
  570: 	ac->llgr_able = 1;
  571: 	ac->llgr_flags = pos[2+i+3];
  572: 	ac->llgr_time = get_u24(pos + 2+i+4);
  573:       }
  574:       break;
  575: 
  576:       /* We can safely ignore all other capabilities */
  577:     }
  578: 
  579:     ADVANCE(pos, len, 2 + cl);
  580:   }
  581: 
  582:   /* The LLGR capability must be advertised together with the GR capability,
  583:      otherwise it must be disregarded */
  584:   if (!caps->gr_aware && caps->llgr_aware)
  585:   {
  586:     caps->llgr_aware = 0;
  587:     WALK_AF_CAPS(caps, ac)
  588:     {
  589:       ac->llgr_able = 0;
  590:       ac->llgr_flags = 0;
  591:       ac->llgr_time = 0;
  592:     }
  593:   }
  594: 
  595:   conn->remote_caps = caps;
  596:   return 0;
  597: 
  598: err:
  599:   mb_free(caps);
  600:   bgp_error(conn, 2, 0, NULL, 0);
  601:   return -1;
  602: }
  603: 
  604: static int
  605: bgp_check_capabilities(struct bgp_conn *conn)
  606: {
  607:   struct bgp_proto *p = conn->bgp;
  608:   struct bgp_caps *local = conn->local_caps;
  609:   struct bgp_caps *remote = conn->remote_caps;
  610:   struct bgp_channel *c;
  611:   int count = 0;
  612: 
  613:   /* This is partially overlapping with bgp_conn_enter_established_state(),
  614:      but we need to run this just after we receive OPEN message */
  615: 
  616:   WALK_LIST(c, p->p.channels)
  617:   {
  618:     const struct bgp_af_caps *loc = bgp_find_af_caps(local,  c->afi);
  619:     const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
  620: 
  621:     /* Find out whether this channel will be active */
  622:     int active = loc && loc->ready &&
  623:       ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
  624: 
  625:     /* Mandatory must be active */
  626:     if (c->cf->mandatory && !active)
  627:       return 0;
  628: 
  629:     if (active)
  630:       count++;
  631:   }
  632: 
  633:   /* We need at least one channel active */
  634:   if (!count)
  635:     return 0;
  636: 
  637:   return 1;
  638: }
  639: 
  640: static int
  641: bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest)
  642: {
  643:   struct bgp_proto *p = conn->bgp;
  644:   int ext = 0;
  645: 
  646:   /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */
  647:   if ((len > 0) && (rest > 0) && (pos[0] == 255))
  648:   {
  649:     if (rest < 3)
  650:       goto err;
  651: 
  652:     /* Update pos/len to describe optional data */
  653:     len = get_u16(pos+1);
  654:     ext = 1;
  655:     pos += 3;
  656:     rest -= 3;
  657:   }
  658: 
  659:   /* Verify that optional data fits into OPEN packet */
  660:   if (len > rest)
  661:     goto err;
  662: 
  663:   /* Length of option parameter header */
  664:   uint hlen = ext ? 3 : 2;
  665: 
  666:   while (len > 0)
  667:   {
  668:     if (len < hlen)
  669:       goto err;
  670: 
  671:     uint otype = get_u8(pos);
  672:     uint olen = ext ? get_u16(pos+1) : get_u8(pos+1);
  673: 
  674:     if (len < (hlen + olen))
  675:       goto err;
  676: 
  677:     if (otype == 2)
  678:     {
  679:       /* BGP capabilities, RFC 5492 */
  680:       if (p->cf->capabilities)
  681: 	if (bgp_read_capabilities(conn, pos + hlen, olen) < 0)
  682: 	  return -1;
  683:     }
  684:     else
  685:     {
  686:       /* Unknown option */
  687:       bgp_error(conn, 2, 4, pos, hlen + olen);
  688:       return -1;
  689:     }
  690: 
  691:     ADVANCE(pos, len, hlen + olen);
  692:   }
  693: 
  694:   /* Prepare empty caps if no capability option was announced */
  695:   if (!conn->remote_caps)
  696:     conn->remote_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
  697: 
  698:   return 0;
  699: 
  700: err:
  701:   bgp_error(conn, 2, 0, NULL, 0);
  702:   return -1;
  703: }
  704: 
  705: static byte *
  706: bgp_create_open(struct bgp_conn *conn, byte *buf)
  707: {
  708:   struct bgp_proto *p = conn->bgp;
  709: 
  710:   BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
  711: 	    BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
  712: 
  713:   buf[0] = BGP_VERSION;
  714:   put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
  715:   put_u16(buf+3, p->cf->hold_time);
  716:   put_u32(buf+5, p->local_id);
  717: 
  718:   if (p->cf->capabilities)
  719:   {
  720:     /* Prepare local_caps and write capabilities to buffer */
  721:     byte *pos = buf+12;
  722:     byte *end = bgp_write_capabilities(conn, pos);
  723:     uint len = end - pos;
  724: 
  725:     if (len < 254)
  726:     {
  727:       buf[9] = len + 2;		/* Optional parameters length */
  728:       buf[10] = 2;		/* Option 2: Capability list */
  729:       buf[11] = len;		/* Option data length */
  730:     }
  731:     else /* draft-ietf-idr-ext-opt-param-07 */
  732:     {
  733:       /* Move capabilities 4 B forward */
  734:       memmove(buf + 16, pos, len);
  735:       pos = buf + 16;
  736:       end = pos + len;
  737: 
  738:       buf[9] = 255;		/* Non-ext OP length, fake */
  739:       buf[10] = 255;		/* Non-ext OP type, signals extended length */
  740:       put_u16(buf+11, len + 3);	/* Extended optional parameters length */
  741:       buf[13] = 2;		/* Option 2: Capability list */
  742:       put_u16(buf+14, len);	/* Option extended data length */
  743:     }
  744: 
  745:     return end;
  746:   }
  747:   else
  748:   {
  749:     buf[9] = 0;			/* No optional parameters */
  750:     return buf + 10;
  751:   }
  752: 
  753:   return buf;
  754: }
  755: 
  756: static void
  757: bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
  758: {
  759:   struct bgp_proto *p = conn->bgp;
  760:   struct bgp_conn *other;
  761:   u32 asn, hold, id;
  762: 
  763:   /* Check state */
  764:   if (conn->state != BS_OPENSENT)
  765:   { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
  766: 
  767:   /* Check message length */
  768:   if (len < 29)
  769:   { bgp_error(conn, 1, 2, pkt+16, 2); return; }
  770: 
  771:   if (pkt[19] != BGP_VERSION)
  772:   { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
  773: 
  774:   asn = get_u16(pkt+20);
  775:   hold = get_u16(pkt+22);
  776:   id = get_u32(pkt+24);
  777:   BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
  778: 
  779:   if (bgp_read_options(conn, pkt+29, pkt[28], len-29) < 0)
  780:     return;
  781: 
  782:   if (hold > 0 && hold < 3)
  783:   { bgp_error(conn, 2, 6, pkt+22, 2); return; }
  784: 
  785:   /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
  786:   if (!id || (p->is_internal && id == p->local_id))
  787:   { bgp_error(conn, 2, 3, pkt+24, -4); return; }
  788: 
  789:   /* RFC 5492 4 - check for required capabilities */
  790:   if (p->cf->capabilities && !bgp_check_capabilities(conn))
  791:   { bgp_error(conn, 2, 7, NULL, 0); return; }
  792: 
  793:   struct bgp_caps *caps = conn->remote_caps;
  794: 
  795:   if (caps->as4_support)
  796:   {
  797:     u32 as4 = caps->as4_number;
  798: 
  799:     if ((as4 != asn) && (asn != AS_TRANS))
  800:       log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
  801: 
  802:     /* When remote ASN is unspecified, it must be external one */
  803:     if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as))
  804:     { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
  805: 
  806:     conn->received_as = as4;
  807:   }
  808:   else
  809:   {
  810:     if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as))
  811:     { bgp_error(conn, 2, 2, pkt+20, 2); return; }
  812: 
  813:     conn->received_as = asn;
  814:   }
  815: 
  816:   /* Check the other connection */
  817:   other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
  818:   switch (other->state)
  819:   {
  820:   case BS_CONNECT:
  821:   case BS_ACTIVE:
  822:     /* Stop outgoing connection attempts */
  823:     bgp_conn_enter_idle_state(other);
  824:     break;
  825: 
  826:   case BS_IDLE:
  827:   case BS_OPENSENT:
  828:   case BS_CLOSE:
  829:     break;
  830: 
  831:   case BS_OPENCONFIRM:
  832:     /*
  833:      * Description of collision detection rules in RFC 4271 is confusing and
  834:      * contradictory, but it is essentially:
  835:      *
  836:      * 1. Router with higher ID is dominant
  837:      * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
  838:      * 3. When both connections are in OpenConfirm state, one initiated by
  839:      *    the dominant router is kept.
  840:      *
  841:      * The first line in the expression below evaluates whether the neighbor
  842:      * is dominant, the second line whether the new connection was initiated
  843:      * by the neighbor. If both are true (or both are false), we keep the new
  844:      * connection, otherwise we keep the old one.
  845:      */
  846:     if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
  847: 	== (conn == &p->incoming_conn))
  848:     {
  849:       /* Should close the other connection */
  850:       BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
  851:       bgp_error(other, 6, 7, NULL, 0);
  852:       break;
  853:     }
  854:     /* Fall thru */
  855:   case BS_ESTABLISHED:
  856:     /* Should close this connection */
  857:     BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
  858:     bgp_error(conn, 6, 7, NULL, 0);
  859:     return;
  860: 
  861:   default:
  862:     bug("bgp_rx_open: Unknown state");
  863:   }
  864: 
  865:   /* Update our local variables */
  866:   conn->hold_time = MIN(hold, p->cf->hold_time);
  867:   conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
  868:   conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
  869:   conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
  870:   p->remote_id = id;
  871: 
  872:   DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
  873:       conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
  874: 
  875:   bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
  876:   bgp_start_timer(conn->hold_timer, conn->hold_time);
  877:   bgp_conn_enter_openconfirm_state(conn);
  878: }
  879: 
  880: 
  881: /*
  882:  *	Next hop handling
  883:  */
  884: 
  885: #define REPORT(msg, args...) \
  886:   ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
  887: 
  888: #define DISCARD(msg, args...) \
  889:   ({ REPORT(msg, ## args); return; })
  890: 
  891: #define WITHDRAW(msg, args...) \
  892:   ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
  893: 
  894: #define BAD_AFI		"Unexpected AF <%u/%u> in UPDATE"
  895: #define BAD_NEXT_HOP	"Invalid NEXT_HOP attribute"
  896: #define NO_NEXT_HOP	"Missing NEXT_HOP attribute"
  897: #define NO_LABEL_STACK	"Missing MPLS stack"
  898: 
  899: 
  900: static void
  901: bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
  902: {
  903:   struct bgp_proto *p = s->proto;
  904:   struct bgp_channel *c = s->channel;
  905: 
  906:   if (c->cf->gw_mode == GW_DIRECT)
  907:   {
  908:     neighbor *nbr = NULL;
  909: 
  910:     /* GW_DIRECT -> single_hop -> p->neigh != NULL */
  911:     if (ipa_nonzero(gw))
  912:       nbr = neigh_find(&p->p, gw, NULL, 0);
  913:     else if (ipa_nonzero(ll))
  914:       nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
  915: 
  916:     if (!nbr || (nbr->scope == SCOPE_HOST))
  917:       WITHDRAW(BAD_NEXT_HOP);
  918: 
  919:     a->dest = RTD_UNICAST;
  920:     a->nh.gw = nbr->addr;
  921:     a->nh.iface = nbr->iface;
  922:     a->igp_metric = c->cf->cost;
  923:   }
  924:   else /* GW_RECURSIVE */
  925:   {
  926:     if (ipa_zero(gw))
  927:       WITHDRAW(BAD_NEXT_HOP);
  928: 
  929:     rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
  930:     s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
  931: 
  932:     if (!s->mpls)
  933:       rta_apply_hostentry(a, s->hostentry, NULL);
  934: 
  935:     /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
  936:   }
  937: }
  938: 
  939: static void
  940: bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
  941: {
  942:   if (lnum > MPLS_MAX_LABEL_STACK)
  943:   {
  944:     REPORT("Too many MPLS labels ($u)", lnum);
  945: 
  946:     a->dest = RTD_UNREACHABLE;
  947:     a->hostentry = NULL;
  948:     a->nh = (struct nexthop) { };
  949:     return;
  950:   }
  951: 
  952:   /* Handle implicit NULL as empty MPLS stack */
  953:   if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
  954:     lnum = 0;
  955: 
  956:   if (s->channel->cf->gw_mode == GW_DIRECT)
  957:   {
  958:     a->nh.labels = lnum;
  959:     memcpy(a->nh.label, labels, 4*lnum);
  960:   }
  961:   else /* GW_RECURSIVE */
  962:   {
  963:     mpls_label_stack ms;
  964: 
  965:     ms.len = lnum;
  966:     memcpy(ms.stack, labels, 4*lnum);
  967:     rta_apply_hostentry(a, s->hostentry, &ms);
  968:   }
  969: }
  970: 
  971: 
  972: static int
  973: bgp_match_src(struct bgp_export_state *s, int mode)
  974: {
  975:   switch (mode)
  976:   {
  977:   case NH_NO:		return 0;
  978:   case NH_ALL:		return 1;
  979:   case NH_IBGP:		return s->src && s->src->is_internal;
  980:   case NH_EBGP:		return s->src && !s->src->is_internal;
  981:   default:		return 0;
  982:   }
  983: }
  984: 
  985: static inline int
  986: bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
  987: {
  988:   struct bgp_proto *p = s->proto;
  989:   struct bgp_channel *c = s->channel;
  990:   ip_addr *nh = (void *) a->u.ptr->data;
  991: 
  992:   /* Handle next hop self option */
  993:   if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
  994:     return 0;
  995: 
  996:   /* Handle next hop keep option */
  997:   if (c->cf->next_hop_keep && bgp_match_src(s, c->cf->next_hop_keep))
  998:     return 1;
  999: 
 1000:   /* Keep it when explicitly set in export filter */
 1001:   if (a->type & EAF_FRESH)
 1002:     return 1;
 1003: 
 1004:   /* Check for non-matching AF */
 1005:   if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
 1006:     return 0;
 1007: 
 1008:   /* Keep it when exported to internal peers */
 1009:   if (p->is_interior && ipa_nonzero(*nh))
 1010:     return 1;
 1011: 
 1012:   /* Keep it when forwarded between single-hop BGPs on the same iface */
 1013:   struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
 1014:   return p->neigh && (p->neigh->iface == ifa);
 1015: }
 1016: 
 1017: static inline int
 1018: bgp_use_gateway(struct bgp_export_state *s)
 1019: {
 1020:   struct bgp_proto *p = s->proto;
 1021:   struct bgp_channel *c = s->channel;
 1022:   rta *ra = s->route->attrs;
 1023: 
 1024:   /* Handle next hop self option - also applies to gateway */
 1025:   if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
 1026:     return 0;
 1027: 
 1028:   /* We need one valid global gateway */
 1029:   if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
 1030:     return 0;
 1031: 
 1032:   /* Check for non-matching AF */
 1033:   if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
 1034:     return 0;
 1035: 
 1036:   /* Use it when exported to internal peers */
 1037:   if (p->is_interior)
 1038:     return 1;
 1039: 
 1040:   /* Use it when forwarded to single-hop BGP peer on on the same iface */
 1041:   return p->neigh && (p->neigh->iface == ra->nh.iface);
 1042: }
 1043: 
 1044: static void
 1045: bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
 1046: {
 1047:   if (!a || !bgp_use_next_hop(s, a))
 1048:   {
 1049:     if (bgp_use_gateway(s))
 1050:     {
 1051:       rta *ra = s->route->attrs;
 1052:       ip_addr nh[1] = { ra->nh.gw };
 1053:       bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
 1054: 
 1055:       if (s->mpls)
 1056:       {
 1057: 	u32 implicit_null = BGP_MPLS_NULL;
 1058: 	u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
 1059: 	uint lnum = ra->nh.labels ? ra->nh.labels : 1;
 1060: 	bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
 1061:       }
 1062:     }
 1063:     else
 1064:     {
 1065:       ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
 1066:       bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
 1067:       s->local_next_hop = 1;
 1068: 
 1069:       /* TODO: Use local MPLS assigned label */
 1070:       if (s->mpls)
 1071:       {
 1072: 	u32 implicit_null = BGP_MPLS_NULL;
 1073: 	bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
 1074:       }
 1075:     }
 1076:   }
 1077: 
 1078:   /* Check if next hop is valid */
 1079:   a = bgp_find_attr(*to, BA_NEXT_HOP);
 1080:   if (!a)
 1081:     WITHDRAW(NO_NEXT_HOP);
 1082: 
 1083:   ip_addr *nh = (void *) a->u.ptr->data;
 1084:   ip_addr peer = s->proto->remote_ip;
 1085:   uint len = a->u.ptr->length;
 1086: 
 1087:   /* Forbid zero next hop */
 1088:   if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
 1089:     WITHDRAW(BAD_NEXT_HOP);
 1090: 
 1091:   /* Forbid next hop equal to neighbor IP */
 1092:   if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
 1093:     WITHDRAW(BAD_NEXT_HOP);
 1094: 
 1095:   /* Forbid next hop with non-matching AF */
 1096:   if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
 1097:       !s->channel->ext_next_hop)
 1098:     WITHDRAW(BAD_NEXT_HOP);
 1099: 
 1100:   /* Just check if MPLS stack */
 1101:   if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
 1102:     WITHDRAW(NO_LABEL_STACK);
 1103: }
 1104: 
 1105: static uint
 1106: bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
 1107: {
 1108:   /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
 1109:   ip_addr *nh = (void *) a->u.ptr->data;
 1110:   uint len = a->u.ptr->length;
 1111: 
 1112:   ASSERT((len == 16) || (len == 32));
 1113: 
 1114:   /*
 1115:    * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
 1116:    * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
 1117:    * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
 1118:    * IPv6 address with IPv6 NLRI.
 1119:    */
 1120: 
 1121:   if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
 1122:   {
 1123:     put_ip4(buf, ipa_to_ip4(nh[0]));
 1124:     return 4;
 1125:   }
 1126: 
 1127:   put_ip6(buf, ipa_to_ip6(nh[0]));
 1128: 
 1129:   if (len == 32)
 1130:     put_ip6(buf+16, ipa_to_ip6(nh[1]));
 1131: 
 1132:   return len;
 1133: }
 1134: 
 1135: static void
 1136: bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
 1137: {
 1138:   struct bgp_channel *c = s->channel;
 1139:   struct adata *ad = lp_alloc_adata(s->pool, 32);
 1140:   ip_addr *nh = (void *) ad->data;
 1141: 
 1142:   if (len == 4)
 1143:   {
 1144:     nh[0] = ipa_from_ip4(get_ip4(data));
 1145:     nh[1] = IPA_NONE;
 1146:   }
 1147:   else if (len == 16)
 1148:   {
 1149:     nh[0] = ipa_from_ip6(get_ip6(data));
 1150:     nh[1] = IPA_NONE;
 1151: 
 1152:     if (ipa_is_link_local(nh[0]))
 1153:     { nh[1] = nh[0]; nh[0] = IPA_NONE; }
 1154:   }
 1155:   else if (len == 32)
 1156:   {
 1157:     nh[0] = ipa_from_ip6(get_ip6(data));
 1158:     nh[1] = ipa_from_ip6(get_ip6(data+16));
 1159: 
 1160:     if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
 1161:       nh[1] = IPA_NONE;
 1162:   }
 1163:   else
 1164:     bgp_parse_error(s, 9);
 1165: 
 1166:   if (ipa_zero(nh[1]))
 1167:     ad->length = 16;
 1168: 
 1169:   if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
 1170:     WITHDRAW(BAD_NEXT_HOP);
 1171: 
 1172:   // XXXX validate next hop
 1173: 
 1174:   bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
 1175:   bgp_apply_next_hop(s, a, nh[0], nh[1]);
 1176: }
 1177: 
 1178: static uint
 1179: bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
 1180: {
 1181:   ip_addr *nh = (void *) a->u.ptr->data;
 1182:   uint len = a->u.ptr->length;
 1183: 
 1184:   ASSERT((len == 16) || (len == 32));
 1185: 
 1186:   /*
 1187:    * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
 1188:    * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
 1189:    * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
 1190:    * IPv6 address with VPNv6 NLRI.
 1191:    */
 1192: 
 1193:   if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
 1194:   {
 1195:     put_u64(buf, 0); /* VPN RD is 0 */
 1196:     put_ip4(buf+8, ipa_to_ip4(nh[0]));
 1197:     return 12;
 1198:   }
 1199: 
 1200:   put_u64(buf, 0); /* VPN RD is 0 */
 1201:   put_ip6(buf+8, ipa_to_ip6(nh[0]));
 1202: 
 1203:   if (len == 16)
 1204:     return 24;
 1205: 
 1206:   put_u64(buf+24, 0); /* VPN RD is 0 */
 1207:   put_ip6(buf+32, ipa_to_ip6(nh[1]));
 1208: 
 1209:   return 48;
 1210: }
 1211: 
 1212: static void
 1213: bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
 1214: {
 1215:   struct bgp_channel *c = s->channel;
 1216:   struct adata *ad = lp_alloc_adata(s->pool, 32);
 1217:   ip_addr *nh = (void *) ad->data;
 1218: 
 1219:   if (len == 12)
 1220:   {
 1221:     nh[0] = ipa_from_ip4(get_ip4(data+8));
 1222:     nh[1] = IPA_NONE;
 1223:   }
 1224:   else if (len == 24)
 1225:   {
 1226:     nh[0] = ipa_from_ip6(get_ip6(data+8));
 1227:     nh[1] = IPA_NONE;
 1228: 
 1229:     if (ipa_is_link_local(nh[0]))
 1230:     { nh[1] = nh[0]; nh[0] = IPA_NONE; }
 1231:   }
 1232:   else if (len == 48)
 1233:   {
 1234:     nh[0] = ipa_from_ip6(get_ip6(data+8));
 1235:     nh[1] = ipa_from_ip6(get_ip6(data+32));
 1236: 
 1237:     if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
 1238:       nh[1] = IPA_NONE;
 1239:   }
 1240:   else
 1241:     bgp_parse_error(s, 9);
 1242: 
 1243:   if (ipa_zero(nh[1]))
 1244:     ad->length = 16;
 1245: 
 1246:   /* XXXX which error */
 1247:   if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
 1248:     bgp_parse_error(s, 9);
 1249: 
 1250:   if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
 1251:     WITHDRAW(BAD_NEXT_HOP);
 1252: 
 1253:   // XXXX validate next hop
 1254: 
 1255:   bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
 1256:   bgp_apply_next_hop(s, a, nh[0], nh[1]);
 1257: }
 1258: 
 1259: 
 1260: 
 1261: static uint
 1262: bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
 1263: {
 1264:   return 0;
 1265: }
 1266: 
 1267: static void
 1268: bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
 1269: {
 1270:   /*
 1271:    * Although we expect no next hop and RFC 7606 7.11 states that attribute
 1272:    * MP_REACH_NLRI with unexpected next hop length is considered malformed,
 1273:    * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
 1274:    */
 1275: 
 1276:   return;
 1277: }
 1278: 
 1279: static void
 1280: bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
 1281: {
 1282:   /* NEXT_HOP shall not pass */
 1283:   if (a)
 1284:     bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
 1285: }
 1286: 
 1287: 
 1288: /*
 1289:  *	UPDATE
 1290:  */
 1291: 
 1292: static void
 1293: bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
 1294: {
 1295:   if (path_id != s->last_id)
 1296:   {
 1297:     s->last_src = rt_get_source(&s->proto->p, path_id);
 1298:     s->last_id = path_id;
 1299: 
 1300:     rta_free(s->cached_rta);
 1301:     s->cached_rta = NULL;
 1302:   }
 1303: 
 1304:   if (!a0)
 1305:   {
 1306:     /* Route withdraw */
 1307:     rte_update3(&s->channel->c, n, NULL, s->last_src);
 1308:     return;
 1309:   }
 1310: 
 1311:   /* Prepare cached route attributes */
 1312:   if (s->cached_rta == NULL)
 1313:   {
 1314:     a0->src = s->last_src;
 1315: 
 1316:     /* Workaround for rta_lookup() breaking eattrs */
 1317:     ea_list *ea = a0->eattrs;
 1318:     s->cached_rta = rta_lookup(a0);
 1319:     a0->eattrs = ea;
 1320:   }
 1321: 
 1322:   rta *a = rta_clone(s->cached_rta);
 1323:   rte *e = rte_get_temp(a);
 1324: 
 1325:   e->pflags = 0;
 1326:   e->u.bgp.suppressed = 0;
 1327:   e->u.bgp.stale = -1;
 1328:   rte_update3(&s->channel->c, n, e, s->last_src);
 1329: }
 1330: 
 1331: static void
 1332: bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte **pos, uint *size, byte *pxlen)
 1333: {
 1334:   const u32 dummy = 0;
 1335:   const u32 *labels = mpls ? (const u32 *) mpls->data : &dummy;
 1336:   uint lnum = mpls ? (mpls->length / 4) : 1;
 1337: 
 1338:   for (uint i = 0; i < lnum; i++)
 1339:   {
 1340:     put_u24(*pos, labels[i] << 4);
 1341:     ADVANCE(*pos, *size, 3);
 1342:   }
 1343: 
 1344:   /* Add bottom-of-stack flag */
 1345:   (*pos)[-1] |= BGP_MPLS_BOS;
 1346: 
 1347:   *pxlen += 24 * lnum;
 1348: }
 1349: 
 1350: static void
 1351: bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
 1352: {
 1353:   u32 labels[BGP_MPLS_MAX], label;
 1354:   uint lnum = 0;
 1355: 
 1356:   do {
 1357:     if (*pxlen < 24)
 1358:       bgp_parse_error(s, 1);
 1359: 
 1360:     label = get_u24(*pos);
 1361:     labels[lnum++] = label >> 4;
 1362:     ADVANCE(*pos, *len, 3);
 1363:     *pxlen -= 24;
 1364: 
 1365:     /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
 1366:        fixed-size 24-bit Compatibility field, which MUST be ignored */
 1367:     if (!a && !s->err_withdraw)
 1368:       return;
 1369:   }
 1370:   while (!(label & BGP_MPLS_BOS));
 1371: 
 1372:   if (!a)
 1373:     return;
 1374: 
 1375:   /* Attach MPLS attribute unless we already have one */
 1376:   if (!s->mpls_labels)
 1377:   {
 1378:     s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
 1379:     bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
 1380:   }
 1381: 
 1382:   /* Overwrite data in the attribute */
 1383:   s->mpls_labels->length = 4*lnum;
 1384:   memcpy(s->mpls_labels->data, labels, 4*lnum);
 1385: 
 1386:   /* Update next hop entry in rta */
 1387:   bgp_apply_mpls_labels(s, a, labels, lnum);
 1388: 
 1389:   /* Attributes were changed, invalidate cached entry */
 1390:   rta_free(s->cached_rta);
 1391:   s->cached_rta = NULL;
 1392: 
 1393:   return;
 1394: }
 1395: 
 1396: static uint
 1397: bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1398: {
 1399:   byte *pos = buf;
 1400: 
 1401:   while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
 1402:   {
 1403:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1404:     struct net_addr_ip4 *net = (void *) px->net;
 1405: 
 1406:     /* Encode path ID */
 1407:     if (s->add_path)
 1408:     {
 1409:       put_u32(pos, px->path_id);
 1410:       ADVANCE(pos, size, 4);
 1411:     }
 1412: 
 1413:     /* Encode prefix length */
 1414:     *pos = net->pxlen;
 1415:     ADVANCE(pos, size, 1);
 1416: 
 1417:     /* Encode MPLS labels */
 1418:     if (s->mpls)
 1419:       bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
 1420: 
 1421:     /* Encode prefix body */
 1422:     ip4_addr a = ip4_hton(net->prefix);
 1423:     uint b = (net->pxlen + 7) / 8;
 1424:     memcpy(pos, &a, b);
 1425:     ADVANCE(pos, size, b);
 1426: 
 1427:     bgp_free_prefix(s->channel, px);
 1428:   }
 1429: 
 1430:   return pos - buf;
 1431: }
 1432: 
 1433: static void
 1434: bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1435: {
 1436:   while (len)
 1437:   {
 1438:     net_addr_ip4 net;
 1439:     u32 path_id = 0;
 1440: 
 1441:     /* Decode path ID */
 1442:     if (s->add_path)
 1443:     {
 1444:       if (len < 5)
 1445: 	bgp_parse_error(s, 1);
 1446: 
 1447:       path_id = get_u32(pos);
 1448:       ADVANCE(pos, len, 4);
 1449:     }
 1450: 
 1451:     /* Decode prefix length */
 1452:     uint l = *pos;
 1453:     ADVANCE(pos, len, 1);
 1454: 
 1455:     if (len < ((l + 7) / 8))
 1456:       bgp_parse_error(s, 1);
 1457: 
 1458:     /* Decode MPLS labels */
 1459:     if (s->mpls)
 1460:       bgp_decode_mpls_labels(s, &pos, &len, &l, a);
 1461: 
 1462:     if (l > IP4_MAX_PREFIX_LENGTH)
 1463:       bgp_parse_error(s, 10);
 1464: 
 1465:     /* Decode prefix body */
 1466:     ip4_addr addr = IP4_NONE;
 1467:     uint b = (l + 7) / 8;
 1468:     memcpy(&addr, pos, b);
 1469:     ADVANCE(pos, len, b);
 1470: 
 1471:     net = NET_ADDR_IP4(ip4_ntoh(addr), l);
 1472:     net_normalize_ip4(&net);
 1473: 
 1474:     // XXXX validate prefix
 1475: 
 1476:     bgp_rte_update(s, (net_addr *) &net, path_id, a);
 1477:   }
 1478: }
 1479: 
 1480: 
 1481: static uint
 1482: bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1483: {
 1484:   byte *pos = buf;
 1485: 
 1486:   while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
 1487:   {
 1488:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1489:     struct net_addr_ip6 *net = (void *) px->net;
 1490: 
 1491:     /* Encode path ID */
 1492:     if (s->add_path)
 1493:     {
 1494:       put_u32(pos, px->path_id);
 1495:       ADVANCE(pos, size, 4);
 1496:     }
 1497: 
 1498:     /* Encode prefix length */
 1499:     *pos = net->pxlen;
 1500:     ADVANCE(pos, size, 1);
 1501: 
 1502:     /* Encode MPLS labels */
 1503:     if (s->mpls)
 1504:       bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
 1505: 
 1506:     /* Encode prefix body */
 1507:     ip6_addr a = ip6_hton(net->prefix);
 1508:     uint b = (net->pxlen + 7) / 8;
 1509:     memcpy(pos, &a, b);
 1510:     ADVANCE(pos, size, b);
 1511: 
 1512:     bgp_free_prefix(s->channel, px);
 1513:   }
 1514: 
 1515:   return pos - buf;
 1516: }
 1517: 
 1518: static void
 1519: bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1520: {
 1521:   while (len)
 1522:   {
 1523:     net_addr_ip6 net;
 1524:     u32 path_id = 0;
 1525: 
 1526:     /* Decode path ID */
 1527:     if (s->add_path)
 1528:     {
 1529:       if (len < 5)
 1530: 	bgp_parse_error(s, 1);
 1531: 
 1532:       path_id = get_u32(pos);
 1533:       ADVANCE(pos, len, 4);
 1534:     }
 1535: 
 1536:     /* Decode prefix length */
 1537:     uint l = *pos;
 1538:     ADVANCE(pos, len, 1);
 1539: 
 1540:     if (len < ((l + 7) / 8))
 1541:       bgp_parse_error(s, 1);
 1542: 
 1543:     /* Decode MPLS labels */
 1544:     if (s->mpls)
 1545:       bgp_decode_mpls_labels(s, &pos, &len, &l, a);
 1546: 
 1547:     if (l > IP6_MAX_PREFIX_LENGTH)
 1548:       bgp_parse_error(s, 10);
 1549: 
 1550:     /* Decode prefix body */
 1551:     ip6_addr addr = IP6_NONE;
 1552:     uint b = (l + 7) / 8;
 1553:     memcpy(&addr, pos, b);
 1554:     ADVANCE(pos, len, b);
 1555: 
 1556:     net = NET_ADDR_IP6(ip6_ntoh(addr), l);
 1557:     net_normalize_ip6(&net);
 1558: 
 1559:     // XXXX validate prefix
 1560: 
 1561:     bgp_rte_update(s, (net_addr *) &net, path_id, a);
 1562:   }
 1563: }
 1564: 
 1565: static uint
 1566: bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1567: {
 1568:   byte *pos = buf;
 1569: 
 1570:   while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
 1571:   {
 1572:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1573:     struct net_addr_vpn4 *net = (void *) px->net;
 1574: 
 1575:     /* Encode path ID */
 1576:     if (s->add_path)
 1577:     {
 1578:       put_u32(pos, px->path_id);
 1579:       ADVANCE(pos, size, 4);
 1580:     }
 1581: 
 1582:     /* Encode prefix length */
 1583:     *pos = 64 + net->pxlen;
 1584:     ADVANCE(pos, size, 1);
 1585: 
 1586:     /* Encode MPLS labels */
 1587:     if (s->mpls)
 1588:       bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
 1589: 
 1590:     /* Encode route distinguisher */
 1591:     put_u64(pos, net->rd);
 1592:     ADVANCE(pos, size, 8);
 1593: 
 1594:     /* Encode prefix body */
 1595:     ip4_addr a = ip4_hton(net->prefix);
 1596:     uint b = (net->pxlen + 7) / 8;
 1597:     memcpy(pos, &a, b);
 1598:     ADVANCE(pos, size, b);
 1599: 
 1600:     bgp_free_prefix(s->channel, px);
 1601:   }
 1602: 
 1603:   return pos - buf;
 1604: }
 1605: 
 1606: static void
 1607: bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1608: {
 1609:   while (len)
 1610:   {
 1611:     net_addr_vpn4 net;
 1612:     u32 path_id = 0;
 1613: 
 1614:     /* Decode path ID */
 1615:     if (s->add_path)
 1616:     {
 1617:       if (len < 5)
 1618: 	bgp_parse_error(s, 1);
 1619: 
 1620:       path_id = get_u32(pos);
 1621:       ADVANCE(pos, len, 4);
 1622:     }
 1623: 
 1624:     /* Decode prefix length */
 1625:     uint l = *pos;
 1626:     ADVANCE(pos, len, 1);
 1627: 
 1628:     if (len < ((l + 7) / 8))
 1629:       bgp_parse_error(s, 1);
 1630: 
 1631:     /* Decode MPLS labels */
 1632:     if (s->mpls)
 1633:       bgp_decode_mpls_labels(s, &pos, &len, &l, a);
 1634: 
 1635:     /* Decode route distinguisher */
 1636:     if (l < 64)
 1637:       bgp_parse_error(s, 1);
 1638: 
 1639:     u64 rd = get_u64(pos);
 1640:     ADVANCE(pos, len, 8);
 1641:     l -= 64;
 1642: 
 1643:     if (l > IP4_MAX_PREFIX_LENGTH)
 1644:       bgp_parse_error(s, 10);
 1645: 
 1646:     /* Decode prefix body */
 1647:     ip4_addr addr = IP4_NONE;
 1648:     uint b = (l + 7) / 8;
 1649:     memcpy(&addr, pos, b);
 1650:     ADVANCE(pos, len, b);
 1651: 
 1652:     net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
 1653:     net_normalize_vpn4(&net);
 1654: 
 1655:     // XXXX validate prefix
 1656: 
 1657:     bgp_rte_update(s, (net_addr *) &net, path_id, a);
 1658:   }
 1659: }
 1660: 
 1661: 
 1662: static uint
 1663: bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1664: {
 1665:   byte *pos = buf;
 1666: 
 1667:   while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
 1668:   {
 1669:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1670:     struct net_addr_vpn6 *net = (void *) px->net;
 1671: 
 1672:     /* Encode path ID */
 1673:     if (s->add_path)
 1674:     {
 1675:       put_u32(pos, px->path_id);
 1676:       ADVANCE(pos, size, 4);
 1677:     }
 1678: 
 1679:     /* Encode prefix length */
 1680:     *pos = 64 + net->pxlen;
 1681:     ADVANCE(pos, size, 1);
 1682: 
 1683:     /* Encode MPLS labels */
 1684:     if (s->mpls)
 1685:       bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
 1686: 
 1687:     /* Encode route distinguisher */
 1688:     put_u64(pos, net->rd);
 1689:     ADVANCE(pos, size, 8);
 1690: 
 1691:     /* Encode prefix body */
 1692:     ip6_addr a = ip6_hton(net->prefix);
 1693:     uint b = (net->pxlen + 7) / 8;
 1694:     memcpy(pos, &a, b);
 1695:     ADVANCE(pos, size, b);
 1696: 
 1697:     bgp_free_prefix(s->channel, px);
 1698:   }
 1699: 
 1700:   return pos - buf;
 1701: }
 1702: 
 1703: static void
 1704: bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1705: {
 1706:   while (len)
 1707:   {
 1708:     net_addr_vpn6 net;
 1709:     u32 path_id = 0;
 1710: 
 1711:     /* Decode path ID */
 1712:     if (s->add_path)
 1713:     {
 1714:       if (len < 5)
 1715: 	bgp_parse_error(s, 1);
 1716: 
 1717:       path_id = get_u32(pos);
 1718:       ADVANCE(pos, len, 4);
 1719:     }
 1720: 
 1721:     /* Decode prefix length */
 1722:     uint l = *pos;
 1723:     ADVANCE(pos, len, 1);
 1724: 
 1725:     if (len < ((l + 7) / 8))
 1726:       bgp_parse_error(s, 1);
 1727: 
 1728:     /* Decode MPLS labels */
 1729:     if (s->mpls)
 1730:       bgp_decode_mpls_labels(s, &pos, &len, &l, a);
 1731: 
 1732:     /* Decode route distinguisher */
 1733:     if (l < 64)
 1734:       bgp_parse_error(s, 1);
 1735: 
 1736:     u64 rd = get_u64(pos);
 1737:     ADVANCE(pos, len, 8);
 1738:     l -= 64;
 1739: 
 1740:     if (l > IP6_MAX_PREFIX_LENGTH)
 1741:       bgp_parse_error(s, 10);
 1742: 
 1743:     /* Decode prefix body */
 1744:     ip6_addr addr = IP6_NONE;
 1745:     uint b = (l + 7) / 8;
 1746:     memcpy(&addr, pos, b);
 1747:     ADVANCE(pos, len, b);
 1748: 
 1749:     net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
 1750:     net_normalize_vpn6(&net);
 1751: 
 1752:     // XXXX validate prefix
 1753: 
 1754:     bgp_rte_update(s, (net_addr *) &net, path_id, a);
 1755:   }
 1756: }
 1757: 
 1758: 
 1759: static uint
 1760: bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1761: {
 1762:   byte *pos = buf;
 1763: 
 1764:   while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
 1765:   {
 1766:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1767:     struct net_addr_flow4 *net = (void *) px->net;
 1768:     uint flen = net->length - sizeof(net_addr_flow4);
 1769: 
 1770:     /* Encode path ID */
 1771:     if (s->add_path)
 1772:     {
 1773:       put_u32(pos, px->path_id);
 1774:       ADVANCE(pos, size, 4);
 1775:     }
 1776: 
 1777:     if (flen > size)
 1778:       break;
 1779: 
 1780:     /* Copy whole flow data including length */
 1781:     memcpy(pos, net->data, flen);
 1782:     ADVANCE(pos, size, flen);
 1783: 
 1784:     bgp_free_prefix(s->channel, px);
 1785:   }
 1786: 
 1787:   return pos - buf;
 1788: }
 1789: 
 1790: static void
 1791: bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1792: {
 1793:   while (len)
 1794:   {
 1795:     u32 path_id = 0;
 1796: 
 1797:     /* Decode path ID */
 1798:     if (s->add_path)
 1799:     {
 1800:       if (len < 4)
 1801: 	bgp_parse_error(s, 1);
 1802: 
 1803:       path_id = get_u32(pos);
 1804:       ADVANCE(pos, len, 4);
 1805:     }
 1806: 
 1807:     if (len < 2)
 1808:       bgp_parse_error(s, 1);
 1809: 
 1810:     /* Decode flow length */
 1811:     uint hlen = flow_hdr_length(pos);
 1812:     uint dlen = flow_read_length(pos);
 1813:     uint flen = hlen + dlen;
 1814:     byte *data = pos + hlen;
 1815: 
 1816:     if (len < flen)
 1817:       bgp_parse_error(s, 1);
 1818: 
 1819:     /* Validate flow data */
 1820:     enum flow_validated_state r = flow4_validate(data, dlen);
 1821:     if (r != FLOW_ST_VALID)
 1822:     {
 1823:       log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
 1824:       bgp_parse_error(s, 1);
 1825:     }
 1826: 
 1827:     if (data[0] != FLOW_TYPE_DST_PREFIX)
 1828:     {
 1829:       log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
 1830:       bgp_parse_error(s, 1);
 1831:     }
 1832: 
 1833:     /* Decode dst prefix */
 1834:     ip4_addr px = IP4_NONE;
 1835:     uint pxlen = data[1];
 1836: 
 1837:     // FIXME: Use some generic function
 1838:     memcpy(&px, data+2, BYTES(pxlen));
 1839:     px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
 1840: 
 1841:     /* Prepare the flow */
 1842:     net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
 1843:     net_fill_flow4(n, px, pxlen, pos, flen);
 1844:     ADVANCE(pos, len, flen);
 1845: 
 1846:     bgp_rte_update(s, n, path_id, a);
 1847:   }
 1848: }
 1849: 
 1850: 
 1851: static uint
 1852: bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
 1853: {
 1854:   byte *pos = buf;
 1855: 
 1856:   while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
 1857:   {
 1858:     struct bgp_prefix *px = HEAD(buck->prefixes);
 1859:     struct net_addr_flow6 *net = (void *) px->net;
 1860:     uint flen = net->length - sizeof(net_addr_flow6);
 1861: 
 1862:     /* Encode path ID */
 1863:     if (s->add_path)
 1864:     {
 1865:       put_u32(pos, px->path_id);
 1866:       ADVANCE(pos, size, 4);
 1867:     }
 1868: 
 1869:     if (flen > size)
 1870:       break;
 1871: 
 1872:     /* Copy whole flow data including length */
 1873:     memcpy(pos, net->data, flen);
 1874:     ADVANCE(pos, size, flen);
 1875: 
 1876:     bgp_free_prefix(s->channel, px);
 1877:   }
 1878: 
 1879:   return pos - buf;
 1880: }
 1881: 
 1882: static void
 1883: bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
 1884: {
 1885:   while (len)
 1886:   {
 1887:     u32 path_id = 0;
 1888: 
 1889:     /* Decode path ID */
 1890:     if (s->add_path)
 1891:     {
 1892:       if (len < 4)
 1893: 	bgp_parse_error(s, 1);
 1894: 
 1895:       path_id = get_u32(pos);
 1896:       ADVANCE(pos, len, 4);
 1897:     }
 1898: 
 1899:     if (len < 2)
 1900:       bgp_parse_error(s, 1);
 1901: 
 1902:     /* Decode flow length */
 1903:     uint hlen = flow_hdr_length(pos);
 1904:     uint dlen = flow_read_length(pos);
 1905:     uint flen = hlen + dlen;
 1906:     byte *data = pos + hlen;
 1907: 
 1908:     if (len < flen)
 1909:       bgp_parse_error(s, 1);
 1910: 
 1911:     /* Validate flow data */
 1912:     enum flow_validated_state r = flow6_validate(data, dlen);
 1913:     if (r != FLOW_ST_VALID)
 1914:     {
 1915:       log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
 1916:       bgp_parse_error(s, 1);
 1917:     }
 1918: 
 1919:     if (data[0] != FLOW_TYPE_DST_PREFIX)
 1920:     {
 1921:       log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
 1922:       bgp_parse_error(s, 1);
 1923:     }
 1924: 
 1925:     /* Decode dst prefix */
 1926:     ip6_addr px = IP6_NONE;
 1927:     uint pxlen = data[1];
 1928: 
 1929:     // FIXME: Use some generic function
 1930:     memcpy(&px, data+2, BYTES(pxlen));
 1931:     px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen));
 1932: 
 1933:     /* Prepare the flow */
 1934:     net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
 1935:     net_fill_flow6(n, px, pxlen, pos, flen);
 1936:     ADVANCE(pos, len, flen);
 1937: 
 1938:     bgp_rte_update(s, n, path_id, a);
 1939:   }
 1940: }
 1941: 
 1942: 
 1943: static const struct bgp_af_desc bgp_af_table[] = {
 1944:   {
 1945:     .afi = BGP_AF_IPV4,
 1946:     .net = NET_IP4,
 1947:     .name = "ipv4",
 1948:     .encode_nlri = bgp_encode_nlri_ip4,
 1949:     .decode_nlri = bgp_decode_nlri_ip4,
 1950:     .encode_next_hop = bgp_encode_next_hop_ip,
 1951:     .decode_next_hop = bgp_decode_next_hop_ip,
 1952:     .update_next_hop = bgp_update_next_hop_ip,
 1953:   },
 1954:   {
 1955:     .afi = BGP_AF_IPV4_MC,
 1956:     .net = NET_IP4,
 1957:     .name = "ipv4-mc",
 1958:     .encode_nlri = bgp_encode_nlri_ip4,
 1959:     .decode_nlri = bgp_decode_nlri_ip4,
 1960:     .encode_next_hop = bgp_encode_next_hop_ip,
 1961:     .decode_next_hop = bgp_decode_next_hop_ip,
 1962:     .update_next_hop = bgp_update_next_hop_ip,
 1963:   },
 1964:   {
 1965:     .afi = BGP_AF_IPV4_MPLS,
 1966:     .net = NET_IP4,
 1967:     .mpls = 1,
 1968:     .name = "ipv4-mpls",
 1969:     .encode_nlri = bgp_encode_nlri_ip4,
 1970:     .decode_nlri = bgp_decode_nlri_ip4,
 1971:     .encode_next_hop = bgp_encode_next_hop_ip,
 1972:     .decode_next_hop = bgp_decode_next_hop_ip,
 1973:     .update_next_hop = bgp_update_next_hop_ip,
 1974:   },
 1975:   {
 1976:     .afi = BGP_AF_IPV6,
 1977:     .net = NET_IP6,
 1978:     .name = "ipv6",
 1979:     .encode_nlri = bgp_encode_nlri_ip6,
 1980:     .decode_nlri = bgp_decode_nlri_ip6,
 1981:     .encode_next_hop = bgp_encode_next_hop_ip,
 1982:     .decode_next_hop = bgp_decode_next_hop_ip,
 1983:     .update_next_hop = bgp_update_next_hop_ip,
 1984:   },
 1985:   {
 1986:     .afi = BGP_AF_IPV6_MC,
 1987:     .net = NET_IP6,
 1988:     .name = "ipv6-mc",
 1989:     .encode_nlri = bgp_encode_nlri_ip6,
 1990:     .decode_nlri = bgp_decode_nlri_ip6,
 1991:     .encode_next_hop = bgp_encode_next_hop_ip,
 1992:     .decode_next_hop = bgp_decode_next_hop_ip,
 1993:     .update_next_hop = bgp_update_next_hop_ip,
 1994:   },
 1995:   {
 1996:     .afi = BGP_AF_IPV6_MPLS,
 1997:     .net = NET_IP6,
 1998:     .mpls = 1,
 1999:     .name = "ipv6-mpls",
 2000:     .encode_nlri = bgp_encode_nlri_ip6,
 2001:     .decode_nlri = bgp_decode_nlri_ip6,
 2002:     .encode_next_hop = bgp_encode_next_hop_ip,
 2003:     .decode_next_hop = bgp_decode_next_hop_ip,
 2004:     .update_next_hop = bgp_update_next_hop_ip,
 2005:   },
 2006:   {
 2007:     .afi = BGP_AF_VPN4_MPLS,
 2008:     .net = NET_VPN4,
 2009:     .mpls = 1,
 2010:     .name = "vpn4-mpls",
 2011:     .encode_nlri = bgp_encode_nlri_vpn4,
 2012:     .decode_nlri = bgp_decode_nlri_vpn4,
 2013:     .encode_next_hop = bgp_encode_next_hop_vpn,
 2014:     .decode_next_hop = bgp_decode_next_hop_vpn,
 2015:     .update_next_hop = bgp_update_next_hop_ip,
 2016:   },
 2017:   {
 2018:     .afi = BGP_AF_VPN6_MPLS,
 2019:     .net = NET_VPN6,
 2020:     .mpls = 1,
 2021:     .name = "vpn6-mpls",
 2022:     .encode_nlri = bgp_encode_nlri_vpn6,
 2023:     .decode_nlri = bgp_decode_nlri_vpn6,
 2024:     .encode_next_hop = bgp_encode_next_hop_vpn,
 2025:     .decode_next_hop = bgp_decode_next_hop_vpn,
 2026:     .update_next_hop = bgp_update_next_hop_ip,
 2027:   },
 2028:   {
 2029:     .afi = BGP_AF_VPN4_MC,
 2030:     .net = NET_VPN4,
 2031:     .name = "vpn4-mc",
 2032:     .encode_nlri = bgp_encode_nlri_vpn4,
 2033:     .decode_nlri = bgp_decode_nlri_vpn4,
 2034:     .encode_next_hop = bgp_encode_next_hop_vpn,
 2035:     .decode_next_hop = bgp_decode_next_hop_vpn,
 2036:     .update_next_hop = bgp_update_next_hop_ip,
 2037:   },
 2038:   {
 2039:     .afi = BGP_AF_VPN6_MC,
 2040:     .net = NET_VPN6,
 2041:     .name = "vpn6-mc",
 2042:     .encode_nlri = bgp_encode_nlri_vpn6,
 2043:     .decode_nlri = bgp_decode_nlri_vpn6,
 2044:     .encode_next_hop = bgp_encode_next_hop_vpn,
 2045:     .decode_next_hop = bgp_decode_next_hop_vpn,
 2046:     .update_next_hop = bgp_update_next_hop_ip,
 2047:   },
 2048:   {
 2049:     .afi = BGP_AF_FLOW4,
 2050:     .net = NET_FLOW4,
 2051:     .no_igp = 1,
 2052:     .name = "flow4",
 2053:     .encode_nlri = bgp_encode_nlri_flow4,
 2054:     .decode_nlri = bgp_decode_nlri_flow4,
 2055:     .encode_next_hop = bgp_encode_next_hop_none,
 2056:     .decode_next_hop = bgp_decode_next_hop_none,
 2057:     .update_next_hop = bgp_update_next_hop_none,
 2058:   },
 2059:   {
 2060:     .afi = BGP_AF_FLOW6,
 2061:     .net = NET_FLOW6,
 2062:     .no_igp = 1,
 2063:     .name = "flow6",
 2064:     .encode_nlri = bgp_encode_nlri_flow6,
 2065:     .decode_nlri = bgp_decode_nlri_flow6,
 2066:     .encode_next_hop = bgp_encode_next_hop_none,
 2067:     .decode_next_hop = bgp_decode_next_hop_none,
 2068:     .update_next_hop = bgp_update_next_hop_none,
 2069:   },
 2070: };
 2071: 
 2072: const struct bgp_af_desc *
 2073: bgp_get_af_desc(u32 afi)
 2074: {
 2075:   uint i;
 2076:   for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
 2077:     if (bgp_af_table[i].afi == afi)
 2078:       return &bgp_af_table[i];
 2079: 
 2080:   return NULL;
 2081: }
 2082: 
 2083: static inline uint
 2084: bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 2085: {
 2086:   return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
 2087: }
 2088: 
 2089: static inline uint
 2090: bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
 2091: {
 2092:   return s->channel->desc->encode_next_hop(s, nh, buf, 255);
 2093: }
 2094: 
 2095: void
 2096: bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
 2097: {
 2098:   s->channel->desc->update_next_hop(s, a, to);
 2099: }
 2100: 
 2101: #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
 2102: 
 2103: static byte *
 2104: bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 2105: {
 2106:   /*
 2107:    *	2 B	Withdrawn Routes Length (zero)
 2108:    *	---	IPv4 Withdrawn Routes NLRI (unused)
 2109:    *	2 B	Total Path Attribute Length
 2110:    *	var	Path Attributes
 2111:    *	var	IPv4 Network Layer Reachability Information
 2112:    */
 2113: 
 2114:   int lr, la;
 2115: 
 2116:   la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
 2117:   if (la < 0)
 2118:   {
 2119:     /* Attribute list too long */
 2120:     bgp_withdraw_bucket(s->channel, buck);
 2121:     return NULL;
 2122:   }
 2123: 
 2124:   put_u16(buf+0, 0);
 2125:   put_u16(buf+2, la);
 2126: 
 2127:   lr = bgp_encode_nlri(s, buck, buf+4+la, end);
 2128: 
 2129:   return buf+4+la+lr;
 2130: }
 2131: 
 2132: static byte *
 2133: bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 2134: {
 2135:   /*
 2136:    *	2 B	IPv4 Withdrawn Routes Length (zero)
 2137:    *	---	IPv4 Withdrawn Routes NLRI (unused)
 2138:    *	2 B	Total Path Attribute Length
 2139:    *	1 B	MP_REACH_NLRI hdr - Attribute Flags
 2140:    *	1 B	MP_REACH_NLRI hdr - Attribute Type Code
 2141:    *	2 B	MP_REACH_NLRI hdr - Length of Attribute Data
 2142:    *	2 B	MP_REACH_NLRI data - Address Family Identifier
 2143:    *	1 B	MP_REACH_NLRI data - Subsequent Address Family Identifier
 2144:    *	1 B	MP_REACH_NLRI data - Length of Next Hop Network Address
 2145:    *	var	MP_REACH_NLRI data - Network Address of Next Hop
 2146:    *	1 B	MP_REACH_NLRI data - Reserved (zero)
 2147:    *	var	MP_REACH_NLRI data - Network Layer Reachability Information
 2148:    *	var	Rest of Path Attributes
 2149:    *	---	IPv4 Network Layer Reachability Information (unused)
 2150:    */
 2151: 
 2152:   int lh, lr, la;	/* Lengths of next hop, NLRI and attributes */
 2153: 
 2154:   /* Begin of MP_REACH_NLRI atribute */
 2155:   buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
 2156:   buf[5] = BA_MP_REACH_NLRI;
 2157:   put_u16(buf+6, 0);		/* Will be fixed later */
 2158:   put_af3(buf+8, s->channel->afi);
 2159:   byte *pos = buf+11;
 2160: 
 2161:   /* Encode attributes to temporary buffer */
 2162:   byte *abuf = alloca(MAX_ATTRS_LENGTH);
 2163:   la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
 2164:   if (la < 0)
 2165:   {
 2166:     /* Attribute list too long */
 2167:     bgp_withdraw_bucket(s->channel, buck);
 2168:     return NULL;
 2169:   }
 2170: 
 2171:   /* Encode the next hop */
 2172:   lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
 2173:   *pos = lh;
 2174:   pos += 1+lh;
 2175: 
 2176:   /* Reserved field */
 2177:   *pos++ = 0;
 2178: 
 2179:   /* Encode the NLRI */
 2180:   lr = bgp_encode_nlri(s, buck, pos, end - la);
 2181:   pos += lr;
 2182: 
 2183:   /* End of MP_REACH_NLRI atribute, update data length */
 2184:   put_u16(buf+6, pos-buf-8);
 2185: 
 2186:   /* Copy remaining attributes */
 2187:   memcpy(pos, abuf, la);
 2188:   pos += la;
 2189: 
 2190:   /* Initial UPDATE fields */
 2191:   put_u16(buf+0, 0);
 2192:   put_u16(buf+2, pos-buf-4);
 2193: 
 2194:   return pos;
 2195: }
 2196: 
 2197: #undef MAX_ATTRS_LENGTH
 2198: 
 2199: static byte *
 2200: bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 2201: {
 2202:   /*
 2203:    *	2 B	Withdrawn Routes Length
 2204:    *	var	IPv4 Withdrawn Routes NLRI
 2205:    *	2 B	Total Path Attribute Length (zero)
 2206:    *	---	Path Attributes (unused)
 2207:    *	---	IPv4 Network Layer Reachability Information (unused)
 2208:    */
 2209: 
 2210:   uint len = bgp_encode_nlri(s, buck, buf+2, end);
 2211: 
 2212:   put_u16(buf+0, len);
 2213:   put_u16(buf+2+len, 0);
 2214: 
 2215:   return buf+4+len;
 2216: }
 2217: 
 2218: static byte *
 2219: bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
 2220: {
 2221:   /*
 2222:    *	2 B	Withdrawn Routes Length (zero)
 2223:    *	---	IPv4 Withdrawn Routes NLRI (unused)
 2224:    *	2 B	Total Path Attribute Length
 2225:    *	1 B	MP_UNREACH_NLRI hdr - Attribute Flags
 2226:    *	1 B	MP_UNREACH_NLRI hdr - Attribute Type Code
 2227:    *	2 B	MP_UNREACH_NLRI hdr - Length of Attribute Data
 2228:    *	2 B	MP_UNREACH_NLRI data - Address Family Identifier
 2229:    *	1 B	MP_UNREACH_NLRI data - Subsequent Address Family Identifier
 2230:    *	var	MP_UNREACH_NLRI data - Network Layer Reachability Information
 2231:    *	---	IPv4 Network Layer Reachability Information (unused)
 2232:    */
 2233: 
 2234:   uint len = bgp_encode_nlri(s, buck, buf+11, end);
 2235: 
 2236:   put_u16(buf+0, 0);
 2237:   put_u16(buf+2, 7+len);
 2238: 
 2239:   /* Begin of MP_UNREACH_NLRI atribute */
 2240:   buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
 2241:   buf[5] = BA_MP_UNREACH_NLRI;
 2242:   put_u16(buf+6, 3+len);
 2243:   put_af3(buf+8, s->channel->afi);
 2244: 
 2245:   return buf+11+len;
 2246: }
 2247: 
 2248: static byte *
 2249: bgp_create_update(struct bgp_channel *c, byte *buf)
 2250: {
 2251:   struct bgp_proto *p = (void *) c->c.proto;
 2252:   struct bgp_bucket *buck;
 2253:   byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
 2254:   byte *res = NULL;
 2255: 
 2256: again: ;
 2257: 
 2258:   /* Initialize write state */
 2259:   struct bgp_write_state s = {
 2260:     .proto = p,
 2261:     .channel = c,
 2262:     .pool = bgp_linpool,
 2263:     .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
 2264:     .as4_session = p->as4_session,
 2265:     .add_path = c->add_path_tx,
 2266:     .mpls = c->desc->mpls,
 2267:   };
 2268: 
 2269:   /* Try unreachable bucket */
 2270:   if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
 2271:   {
 2272:     res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
 2273:       bgp_create_ip_unreach(&s, buck, buf, end):
 2274:       bgp_create_mp_unreach(&s, buck, buf, end);
 2275: 
 2276:     goto done;
 2277:   }
 2278: 
 2279:   /* Try reachable buckets */
 2280:   if (!EMPTY_LIST(c->bucket_queue))
 2281:   {
 2282:     buck = HEAD(c->bucket_queue);
 2283: 
 2284:     /* Cleanup empty buckets */
 2285:     if (EMPTY_LIST(buck->prefixes))
 2286:     {
 2287:       bgp_free_bucket(c, buck);
 2288:       goto again;
 2289:     }
 2290: 
 2291:     res = !s.mp_reach ?
 2292:       bgp_create_ip_reach(&s, buck, buf, end):
 2293:       bgp_create_mp_reach(&s, buck, buf, end);
 2294: 
 2295:     if (EMPTY_LIST(buck->prefixes))
 2296:       bgp_free_bucket(c, buck);
 2297:     else
 2298:       bgp_defer_bucket(c, buck);
 2299: 
 2300:     if (!res)
 2301:       goto again;
 2302: 
 2303:     goto done;
 2304:   }
 2305: 
 2306:   /* No more prefixes to send */
 2307:   return NULL;
 2308: 
 2309: done:
 2310:   BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
 2311:   lp_flush(s.pool);
 2312: 
 2313:   return res;
 2314: }
 2315: 
 2316: static byte *
 2317: bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
 2318: {
 2319:   /* Empty update packet */
 2320:   put_u32(buf, 0);
 2321: 
 2322:   return buf+4;
 2323: }
 2324: 
 2325: static byte *
 2326: bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
 2327: {
 2328:   put_u16(buf+0, 0);
 2329:   put_u16(buf+2, 6);		/* length 4--9 */
 2330: 
 2331:   /* Empty MP_UNREACH_NLRI atribute */
 2332:   buf[4] = BAF_OPTIONAL;
 2333:   buf[5] = BA_MP_UNREACH_NLRI;
 2334:   buf[6] = 3;			/* Length 7--9 */
 2335:   put_af3(buf+7, c->afi);
 2336: 
 2337:   return buf+10;
 2338: }
 2339: 
 2340: static byte *
 2341: bgp_create_end_mark(struct bgp_channel *c, byte *buf)
 2342: {
 2343:   struct bgp_proto *p = (void *) c->c.proto;
 2344: 
 2345:   BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
 2346: 
 2347:   return (c->afi == BGP_AF_IPV4) ?
 2348:     bgp_create_ip_end_mark(c, buf):
 2349:     bgp_create_mp_end_mark(c, buf);
 2350: }
 2351: 
 2352: static inline void
 2353: bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
 2354: {
 2355:   struct bgp_proto *p = s->proto;
 2356:   struct bgp_channel *c = bgp_get_channel(p, afi);
 2357: 
 2358:   BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
 2359: 
 2360:   if (!c)
 2361:     DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
 2362: 
 2363:   if (c->load_state == BFS_LOADING)
 2364:     c->load_state = BFS_NONE;
 2365: 
 2366:   if (p->p.gr_recovery)
 2367:     channel_graceful_restart_unlock(&c->c);
 2368: 
 2369:   if (c->gr_active)
 2370:     bgp_graceful_restart_done(c);
 2371: }
 2372: 
 2373: static inline void
 2374: bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
 2375: {
 2376:   struct bgp_channel *c = bgp_get_channel(s->proto, afi);
 2377:   rta *a = NULL;
 2378: 
 2379:   if (!c)
 2380:     DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
 2381: 
 2382:   s->channel = c;
 2383:   s->add_path = c->add_path_rx;
 2384:   s->mpls = c->desc->mpls;
 2385: 
 2386:   s->last_id = 0;
 2387:   s->last_src = s->proto->p.main_source;
 2388: 
 2389:   /*
 2390:    * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
 2391:    * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
 2392:    * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
 2393:    * decode_next_hop hooks) by restoring a->eattrs afterwards.
 2394:    */
 2395: 
 2396:   if (ea)
 2397:   {
 2398:     a = allocz(RTA_MAX_SIZE);
 2399: 
 2400:     a->source = RTS_BGP;
 2401:     a->scope = SCOPE_UNIVERSE;
 2402:     a->from = s->proto->remote_ip;
 2403:     a->eattrs = ea;
 2404: 
 2405:     c->desc->decode_next_hop(s, nh, nh_len, a);
 2406:     bgp_finish_attrs(s, a);
 2407: 
 2408:     /* Handle withdraw during next hop decoding */
 2409:     if (s->err_withdraw)
 2410:       a = NULL;
 2411:   }
 2412: 
 2413:   c->desc->decode_nlri(s, nlri, len, a);
 2414: 
 2415:   rta_free(s->cached_rta);
 2416:   s->cached_rta = NULL;
 2417: }
 2418: 
 2419: static void
 2420: bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
 2421: {
 2422:   struct bgp_proto *p = conn->bgp;
 2423:   ea_list *ea = NULL;
 2424: 
 2425:   BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
 2426: 
 2427:   /* Workaround for some BGP implementations that skip initial KEEPALIVE */
 2428:   if (conn->state == BS_OPENCONFIRM)
 2429:     bgp_conn_enter_established_state(conn);
 2430: 
 2431:   if (conn->state != BS_ESTABLISHED)
 2432:   { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
 2433: 
 2434:   bgp_start_timer(conn->hold_timer, conn->hold_time);
 2435: 
 2436:   /* Initialize parse state */
 2437:   struct bgp_parse_state s = {
 2438:     .proto = p,
 2439:     .pool = bgp_linpool,
 2440:     .as4_session = p->as4_session,
 2441:   };
 2442: 
 2443:   /* Parse error handler */
 2444:   if (setjmp(s.err_jmpbuf))
 2445:   {
 2446:     bgp_error(conn, 3, s.err_subcode, NULL, 0);
 2447:     goto done;
 2448:   }
 2449: 
 2450:   /* Check minimal length */
 2451:   if (len < 23)
 2452:   { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 2453: 
 2454:   /* Skip fixed header */
 2455:   uint pos = 19;
 2456: 
 2457:   /*
 2458:    *	UPDATE message format
 2459:    *
 2460:    *	2 B	IPv4 Withdrawn Routes Length
 2461:    *	var	IPv4 Withdrawn Routes NLRI
 2462:    *	2 B	Total Path Attribute Length
 2463:    *	var	Path Attributes
 2464:    *	var	IPv4 Reachable Routes NLRI
 2465:    */
 2466: 
 2467:   s.ip_unreach_len = get_u16(pkt + pos);
 2468:   s.ip_unreach_nlri = pkt + pos + 2;
 2469:   pos += 2 + s.ip_unreach_len;
 2470: 
 2471:   if (pos + 2 > len)
 2472:     bgp_parse_error(&s, 1);
 2473: 
 2474:   s.attr_len = get_u16(pkt + pos);
 2475:   s.attrs = pkt + pos + 2;
 2476:   pos += 2 + s.attr_len;
 2477: 
 2478:   if (pos > len)
 2479:     bgp_parse_error(&s, 1);
 2480: 
 2481:   s.ip_reach_len = len - pos;
 2482:   s.ip_reach_nlri = pkt + pos;
 2483: 
 2484: 
 2485:   if (s.attr_len)
 2486:     ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
 2487:   else
 2488:     ea = NULL;
 2489: 
 2490:   /* Check for End-of-RIB marker */
 2491:   if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
 2492:   { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
 2493: 
 2494:   /* Check for MP End-of-RIB marker */
 2495:   if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
 2496:       !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
 2497:   { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
 2498: 
 2499:   if (s.ip_unreach_len)
 2500:     bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
 2501: 
 2502:   if (s.mp_unreach_len)
 2503:     bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
 2504: 
 2505:   if (s.ip_reach_len)
 2506:     bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
 2507: 		    ea, s.ip_next_hop_data, s.ip_next_hop_len);
 2508: 
 2509:   if (s.mp_reach_len)
 2510:     bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
 2511: 		    ea, s.mp_next_hop_data, s.mp_next_hop_len);
 2512: 
 2513: done:
 2514:   rta_free(s.cached_rta);
 2515:   lp_flush(s.pool);
 2516:   return;
 2517: }
 2518: 
 2519: static uint
 2520: bgp_find_update_afi(byte *pos, uint len)
 2521: {
 2522:   /*
 2523:    * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
 2524:    * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
 2525:    * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
 2526:    */
 2527:   if (len < 23)
 2528:     return 0;
 2529: 
 2530:   /* Assume there is no withrawn NLRI, read lengths and move to attribute list */
 2531:   uint wlen = get_u16(pos + 19);
 2532:   uint alen = get_u16(pos + 21);
 2533:   ADVANCE(pos, len, 23);
 2534: 
 2535:   /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
 2536:   if ((wlen != 0) || (alen < len) || !alen)
 2537:     return BGP_AF_IPV4;
 2538: 
 2539:   if (alen > len)
 2540:     return 0;
 2541: 
 2542:   /* Process attribute list (alen == len) */
 2543:   while (len)
 2544:   {
 2545:     if (len < 2)
 2546:       return 0;
 2547: 
 2548:     uint flags = pos[0];
 2549:     uint code = pos[1];
 2550:     ADVANCE(pos, len, 2);
 2551: 
 2552:     uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2;
 2553:     if (len < ll)
 2554:       return 0;
 2555: 
 2556:     /* Read attribute length and move to attribute body */
 2557:     alen = (ll == 1) ? get_u8(pos) : get_u16(pos);
 2558:     ADVANCE(pos, len, ll);
 2559: 
 2560:     if (len < alen)
 2561:       return 0;
 2562: 
 2563:     /* Found MP NLRI */
 2564:     if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
 2565:     {
 2566:       if (alen < 3)
 2567: 	return 0;
 2568: 
 2569:       return BGP_AF(get_u16(pos), pos[2]);
 2570:     }
 2571: 
 2572:     /* Move to the next attribute */
 2573:     ADVANCE(pos, len, alen);
 2574:   }
 2575: 
 2576:   /* No basic or MP NLRI, but there are some attributes -> error */
 2577:   return 0;
 2578: }
 2579: 
 2580: 
 2581: /*
 2582:  *	ROUTE-REFRESH
 2583:  */
 2584: 
 2585: static inline byte *
 2586: bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
 2587: {
 2588:   struct bgp_proto *p = (void *) c->c.proto;
 2589: 
 2590:   BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
 2591: 
 2592:   /* Original route refresh request, RFC 2918 */
 2593:   put_af4(buf, c->afi);
 2594:   buf[2] = BGP_RR_REQUEST;
 2595: 
 2596:   return buf+4;
 2597: }
 2598: 
 2599: static inline byte *
 2600: bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
 2601: {
 2602:   struct bgp_proto *p = (void *) c->c.proto;
 2603: 
 2604:   BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
 2605: 
 2606:   /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
 2607:   put_af4(buf, c->afi);
 2608:   buf[2] = BGP_RR_BEGIN;
 2609: 
 2610:   return buf+4;
 2611: }
 2612: 
 2613: static inline byte *
 2614: bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
 2615: {
 2616:   struct bgp_proto *p = (void *) c->c.proto;
 2617: 
 2618:   BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
 2619: 
 2620:   /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
 2621:   put_af4(buf, c->afi);
 2622:   buf[2] = BGP_RR_END;
 2623: 
 2624:   return buf+4;
 2625: }
 2626: 
 2627: static void
 2628: bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
 2629: {
 2630:   struct bgp_proto *p = conn->bgp;
 2631: 
 2632:   if (conn->state != BS_ESTABLISHED)
 2633:   { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
 2634: 
 2635:   if (!conn->local_caps->route_refresh)
 2636:   { bgp_error(conn, 1, 3, pkt+18, 1); return; }
 2637: 
 2638:   if (len < (BGP_HEADER_LENGTH + 4))
 2639:   { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 2640: 
 2641:   if (len > (BGP_HEADER_LENGTH + 4))
 2642:   { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
 2643: 
 2644:   struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
 2645:   if (!c)
 2646:   {
 2647:     log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
 2648: 	p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
 2649:     return;
 2650:   }
 2651: 
 2652:   /* RFC 7313 redefined reserved field as RR message subtype */
 2653:   uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
 2654: 
 2655:   switch (subtype)
 2656:   {
 2657:   case BGP_RR_REQUEST:
 2658:     BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
 2659:     channel_request_feeding(&c->c);
 2660:     break;
 2661: 
 2662:   case BGP_RR_BEGIN:
 2663:     BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
 2664:     bgp_refresh_begin(c);
 2665:     break;
 2666: 
 2667:   case BGP_RR_END:
 2668:     BGP_TRACE(D_PACKETS, "Got END-OF-RR");
 2669:     bgp_refresh_end(c);
 2670:     break;
 2671: 
 2672:   default:
 2673:     log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
 2674: 	p->p.name, subtype);
 2675:     break;
 2676:   }
 2677: }
 2678: 
 2679: static inline struct bgp_channel *
 2680: bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
 2681: {
 2682:   uint i = conn->last_channel;
 2683: 
 2684:   /* Try the last channel, but at most several times */
 2685:   if ((conn->channels_to_send & (1 << i)) &&
 2686:       (conn->last_channel_count < 16))
 2687:     goto found;
 2688: 
 2689:   /* Find channel with non-zero channels_to_send */
 2690:   do
 2691:   {
 2692:     i++;
 2693:     if (i >= p->channel_count)
 2694:       i = 0;
 2695:   }
 2696:   while (! (conn->channels_to_send & (1 << i)));
 2697: 
 2698:   /* Use that channel */
 2699:   conn->last_channel = i;
 2700:   conn->last_channel_count = 0;
 2701: 
 2702: found:
 2703:   conn->last_channel_count++;
 2704:   return p->channel_map[i];
 2705: }
 2706: 
 2707: static inline int
 2708: bgp_send(struct bgp_conn *conn, uint type, uint len)
 2709: {
 2710:   sock *sk = conn->sk;
 2711:   byte *buf = sk->tbuf;
 2712: 
 2713:   memset(buf, 0xff, 16);		/* Marker */
 2714:   put_u16(buf+16, len);
 2715:   buf[18] = type;
 2716: 
 2717:   return sk_send(sk, len);
 2718: }
 2719: 
 2720: /**
 2721:  * bgp_fire_tx - transmit packets
 2722:  * @conn: connection
 2723:  *
 2724:  * Whenever the transmit buffers of the underlying TCP connection
 2725:  * are free and we have any packets queued for sending, the socket functions
 2726:  * call bgp_fire_tx() which takes care of selecting the highest priority packet
 2727:  * queued (Notification > Keepalive > Open > Update), assembling its header
 2728:  * and body and sending it to the connection.
 2729:  */
 2730: static int
 2731: bgp_fire_tx(struct bgp_conn *conn)
 2732: {
 2733:   struct bgp_proto *p = conn->bgp;
 2734:   struct bgp_channel *c;
 2735:   byte *buf, *pkt, *end;
 2736:   uint s;
 2737: 
 2738:   if (!conn->sk)
 2739:     return 0;
 2740: 
 2741:   buf = conn->sk->tbuf;
 2742:   pkt = buf + BGP_HEADER_LENGTH;
 2743:   s = conn->packets_to_send;
 2744: 
 2745:   if (s & (1 << PKT_SCHEDULE_CLOSE))
 2746:   {
 2747:     /* We can finally close connection and enter idle state */
 2748:     bgp_conn_enter_idle_state(conn);
 2749:     return 0;
 2750:   }
 2751:   if (s & (1 << PKT_NOTIFICATION))
 2752:   {
 2753:     conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
 2754:     end = bgp_create_notification(conn, pkt);
 2755:     return bgp_send(conn, PKT_NOTIFICATION, end - buf);
 2756:   }
 2757:   else if (s & (1 << PKT_OPEN))
 2758:   {
 2759:     conn->packets_to_send &= ~(1 << PKT_OPEN);
 2760:     end = bgp_create_open(conn, pkt);
 2761:     return bgp_send(conn, PKT_OPEN, end - buf);
 2762:   }
 2763:   else if (s & (1 << PKT_KEEPALIVE))
 2764:   {
 2765:     conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
 2766:     BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
 2767:     bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
 2768:     return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
 2769:   }
 2770:   else while (conn->channels_to_send)
 2771:   {
 2772:     c = bgp_get_channel_to_send(p, conn);
 2773:     s = c->packets_to_send;
 2774: 
 2775:     if (s & (1 << PKT_ROUTE_REFRESH))
 2776:     {
 2777:       c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
 2778:       end = bgp_create_route_refresh(c, pkt);
 2779:       return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
 2780:     }
 2781:     else if (s & (1 << PKT_BEGIN_REFRESH))
 2782:     {
 2783:       /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
 2784:       c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
 2785:       end = bgp_create_begin_refresh(c, pkt);
 2786:       return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
 2787:     }
 2788:     else if (s & (1 << PKT_UPDATE))
 2789:     {
 2790:       end = bgp_create_update(c, pkt);
 2791:       if (end)
 2792: 	return bgp_send(conn, PKT_UPDATE, end - buf);
 2793: 
 2794:       /* No update to send, perhaps we need to send End-of-RIB or EoRR */
 2795:       c->packets_to_send = 0;
 2796:       conn->channels_to_send &= ~(1 << c->index);
 2797: 
 2798:       if (c->feed_state == BFS_LOADED)
 2799:       {
 2800: 	c->feed_state = BFS_NONE;
 2801: 	end = bgp_create_end_mark(c, pkt);
 2802: 	return bgp_send(conn, PKT_UPDATE, end - buf);
 2803:       }
 2804: 
 2805:       else if (c->feed_state == BFS_REFRESHED)
 2806:       {
 2807: 	c->feed_state = BFS_NONE;
 2808: 	end = bgp_create_end_refresh(c, pkt);
 2809: 	return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
 2810:       }
 2811:     }
 2812:     else if (s)
 2813:       bug("Channel packets_to_send: %x", s);
 2814: 
 2815:     c->packets_to_send = 0;
 2816:     conn->channels_to_send &= ~(1 << c->index);
 2817:   }
 2818: 
 2819:   return 0;
 2820: }
 2821: 
 2822: /**
 2823:  * bgp_schedule_packet - schedule a packet for transmission
 2824:  * @conn: connection
 2825:  * @c: channel
 2826:  * @type: packet type
 2827:  *
 2828:  * Schedule a packet of type @type to be sent as soon as possible.
 2829:  */
 2830: void
 2831: bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
 2832: {
 2833:   ASSERT(conn->sk);
 2834: 
 2835:   DBG("BGP: Scheduling packet type %d\n", type);
 2836: 
 2837:   if (c)
 2838:   {
 2839:     if (! conn->channels_to_send)
 2840:     {
 2841:       conn->last_channel = c->index;
 2842:       conn->last_channel_count = 0;
 2843:     }
 2844: 
 2845:     c->packets_to_send |= 1 << type;
 2846:     conn->channels_to_send |= 1 << c->index;
 2847:   }
 2848:   else
 2849:     conn->packets_to_send |= 1 << type;
 2850: 
 2851:   if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
 2852:     ev_schedule(conn->tx_ev);
 2853: }
 2854: void
 2855: bgp_kick_tx(void *vconn)
 2856: {
 2857:   struct bgp_conn *conn = vconn;
 2858: 
 2859:   DBG("BGP: kicking TX\n");
 2860:   uint max = 1024;
 2861:   while (--max && (bgp_fire_tx(conn) > 0))
 2862:     ;
 2863: 
 2864:   if (!max && !ev_active(conn->tx_ev))
 2865:     ev_schedule(conn->tx_ev);
 2866: }
 2867: 
 2868: void
 2869: bgp_tx(sock *sk)
 2870: {
 2871:   struct bgp_conn *conn = sk->data;
 2872: 
 2873:   DBG("BGP: TX hook\n");
 2874:   uint max = 1024;
 2875:   while (--max && (bgp_fire_tx(conn) > 0))
 2876:     ;
 2877: 
 2878:   if (!max && !ev_active(conn->tx_ev))
 2879:     ev_schedule(conn->tx_ev);
 2880: }
 2881: 
 2882: 
 2883: static struct {
 2884:   byte major, minor;
 2885:   byte *msg;
 2886: } bgp_msg_table[] = {
 2887:   { 1, 0, "Invalid message header" },
 2888:   { 1, 1, "Connection not synchronized" },
 2889:   { 1, 2, "Bad message length" },
 2890:   { 1, 3, "Bad message type" },
 2891:   { 2, 0, "Invalid OPEN message" },
 2892:   { 2, 1, "Unsupported version number" },
 2893:   { 2, 2, "Bad peer AS" },
 2894:   { 2, 3, "Bad BGP identifier" },
 2895:   { 2, 4, "Unsupported optional parameter" },
 2896:   { 2, 5, "Authentication failure" },
 2897:   { 2, 6, "Unacceptable hold time" },
 2898:   { 2, 7, "Required capability missing" }, /* [RFC5492] */
 2899:   { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
 2900:   { 3, 0, "Invalid UPDATE message" },
 2901:   { 3, 1, "Malformed attribute list" },
 2902:   { 3, 2, "Unrecognized well-known attribute" },
 2903:   { 3, 3, "Missing mandatory attribute" },
 2904:   { 3, 4, "Invalid attribute flags" },
 2905:   { 3, 5, "Invalid attribute length" },
 2906:   { 3, 6, "Invalid ORIGIN attribute" },
 2907:   { 3, 7, "AS routing loop" },		/* Deprecated */
 2908:   { 3, 8, "Invalid NEXT_HOP attribute" },
 2909:   { 3, 9, "Optional attribute error" },
 2910:   { 3, 10, "Invalid network field" },
 2911:   { 3, 11, "Malformed AS_PATH" },
 2912:   { 4, 0, "Hold timer expired" },
 2913:   { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
 2914:   { 5, 1, "Unexpected message in OpenSent state" },
 2915:   { 5, 2, "Unexpected message in OpenConfirm state" },
 2916:   { 5, 3, "Unexpected message in Established state" },
 2917:   { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
 2918:   { 6, 1, "Maximum number of prefixes reached" },
 2919:   { 6, 2, "Administrative shutdown" },
 2920:   { 6, 3, "Peer de-configured" },
 2921:   { 6, 4, "Administrative reset" },
 2922:   { 6, 5, "Connection rejected" },
 2923:   { 6, 6, "Other configuration change" },
 2924:   { 6, 7, "Connection collision resolution" },
 2925:   { 6, 8, "Out of Resources" },
 2926:   { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
 2927:   { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
 2928: };
 2929: 
 2930: /**
 2931:  * bgp_error_dsc - return BGP error description
 2932:  * @code: BGP error code
 2933:  * @subcode: BGP error subcode
 2934:  *
 2935:  * bgp_error_dsc() returns error description for BGP errors
 2936:  * which might be static string or given temporary buffer.
 2937:  */
 2938: const char *
 2939: bgp_error_dsc(uint code, uint subcode)
 2940: {
 2941:   static char buff[32];
 2942:   uint i;
 2943: 
 2944:   for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
 2945:     if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
 2946:       return bgp_msg_table[i].msg;
 2947: 
 2948:   bsprintf(buff, "Unknown error %u.%u", code, subcode);
 2949:   return buff;
 2950: }
 2951: 
 2952: /* RFC 8203 - shutdown communication message */
 2953: static int
 2954: bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
 2955: {
 2956:   byte *msg = data + 1;
 2957:   uint msg_len = data[0];
 2958:   uint i;
 2959: 
 2960:   /* Handle zero length message */
 2961:   if (msg_len == 0)
 2962:     return 1;
 2963: 
 2964:   /* Handle proper message */
 2965:   if (msg_len + 1 > len)
 2966:     return 0;
 2967: 
 2968:   /* Some elementary cleanup */
 2969:   for (i = 0; i < msg_len; i++)
 2970:     if (msg[i] < ' ')
 2971:       msg[i] = ' ';
 2972: 
 2973:   proto_set_message(&p->p, msg, msg_len);
 2974:   *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
 2975:   return 1;
 2976: }
 2977: 
 2978: void
 2979: bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
 2980: {
 2981:   byte argbuf[256+16], *t = argbuf;
 2982:   uint i;
 2983: 
 2984:   /* Don't report Cease messages generated by myself */
 2985:   if (code == 6 && class == BE_BGP_TX)
 2986:     return;
 2987: 
 2988:   /* Reset shutdown message */
 2989:   if ((code == 6) && ((subcode == 2) || (subcode == 4)))
 2990:     proto_set_message(&p->p, NULL, 0);
 2991: 
 2992:   if (len)
 2993:     {
 2994:       /* Bad peer AS - we would like to print the AS */
 2995:       if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
 2996: 	{
 2997: 	  t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
 2998: 	  goto done;
 2999: 	}
 3000: 
 3001:       /* RFC 8203 - shutdown communication */
 3002:       if (((code == 6) && ((subcode == 2) || (subcode == 4))))
 3003: 	if (bgp_handle_message(p, data, len, &t))
 3004: 	  goto done;
 3005: 
 3006:       *t++ = ':';
 3007:       *t++ = ' ';
 3008:       if (len > 16)
 3009: 	len = 16;
 3010:       for (i=0; i<len; i++)
 3011: 	t += bsprintf(t, "%02x", data[i]);
 3012:     }
 3013: 
 3014: done:
 3015:   *t = 0;
 3016:   const byte *dsc = bgp_error_dsc(code, subcode);
 3017:   log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
 3018: }
 3019: 
 3020: static void
 3021: bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
 3022: {
 3023:   struct bgp_proto *p = conn->bgp;
 3024: 
 3025:   if (len < 21)
 3026:   { bgp_error(conn, 1, 2, pkt+16, 2); return; }
 3027: 
 3028:   uint code = pkt[19];
 3029:   uint subcode = pkt[20];
 3030:   int err = (code != 6);
 3031: 
 3032:   bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
 3033:   bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
 3034: 
 3035:   bgp_conn_enter_close_state(conn);
 3036:   bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
 3037: 
 3038:   if (err)
 3039:   {
 3040:     bgp_update_startup_delay(p);
 3041:     bgp_stop(p, 0, NULL, 0);
 3042:   }
 3043:   else
 3044:   {
 3045:     uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
 3046:     if (p->cf->disable_after_cease & subcode_bit)
 3047:     {
 3048:       log(L_INFO "%s: Disabled after Cease notification", p->p.name);
 3049:       p->startup_delay = 0;
 3050:       p->p.disabled = 1;
 3051:     }
 3052:   }
 3053: }
 3054: 
 3055: static void
 3056: bgp_rx_keepalive(struct bgp_conn *conn)
 3057: {
 3058:   struct bgp_proto *p = conn->bgp;
 3059: 
 3060:   BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
 3061:   bgp_start_timer(conn->hold_timer, conn->hold_time);
 3062: 
 3063:   if (conn->state == BS_OPENCONFIRM)
 3064:   { bgp_conn_enter_established_state(conn); return; }
 3065: 
 3066:   if (conn->state != BS_ESTABLISHED)
 3067:     bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
 3068: }
 3069: 
 3070: 
 3071: /**
 3072:  * bgp_rx_packet - handle a received packet
 3073:  * @conn: BGP connection
 3074:  * @pkt: start of the packet
 3075:  * @len: packet size
 3076:  *
 3077:  * bgp_rx_packet() takes a newly received packet and calls the corresponding
 3078:  * packet handler according to the packet type.
 3079:  */
 3080: static void
 3081: bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
 3082: {
 3083:   byte type = pkt[18];
 3084: 
 3085:   DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
 3086: 
 3087:   if (conn->bgp->p.mrtdump & MD_MESSAGES)
 3088:     bgp_dump_message(conn, pkt, len);
 3089: 
 3090:   switch (type)
 3091:   {
 3092:   case PKT_OPEN:		return bgp_rx_open(conn, pkt, len);
 3093:   case PKT_UPDATE:		return bgp_rx_update(conn, pkt, len);
 3094:   case PKT_NOTIFICATION:	return bgp_rx_notification(conn, pkt, len);
 3095:   case PKT_KEEPALIVE:		return bgp_rx_keepalive(conn);
 3096:   case PKT_ROUTE_REFRESH:	return bgp_rx_route_refresh(conn, pkt, len);
 3097:   default:			bgp_error(conn, 1, 3, pkt+18, 1);
 3098:   }
 3099: }
 3100: 
 3101: /**
 3102:  * bgp_rx - handle received data
 3103:  * @sk: socket
 3104:  * @size: amount of data received
 3105:  *
 3106:  * bgp_rx() is called by the socket layer whenever new data arrive from
 3107:  * the underlying TCP connection. It assembles the data fragments to packets,
 3108:  * checks their headers and framing and passes complete packets to
 3109:  * bgp_rx_packet().
 3110:  */
 3111: int
 3112: bgp_rx(sock *sk, uint size)
 3113: {
 3114:   struct bgp_conn *conn = sk->data;
 3115:   byte *pkt_start = sk->rbuf;
 3116:   byte *end = pkt_start + size;
 3117:   uint i, len;
 3118: 
 3119:   DBG("BGP: RX hook: Got %d bytes\n", size);
 3120:   while (end >= pkt_start + BGP_HEADER_LENGTH)
 3121:     {
 3122:       if ((conn->state == BS_CLOSE) || (conn->sk != sk))
 3123: 	return 0;
 3124:       for(i=0; i<16; i++)
 3125: 	if (pkt_start[i] != 0xff)
 3126: 	  {
 3127: 	    bgp_error(conn, 1, 1, NULL, 0);
 3128: 	    break;
 3129: 	  }
 3130:       len = get_u16(pkt_start+16);
 3131:       if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
 3132: 	{
 3133: 	  bgp_error(conn, 1, 2, pkt_start+16, 2);
 3134: 	  break;
 3135: 	}
 3136:       if (end < pkt_start + len)
 3137: 	break;
 3138:       bgp_rx_packet(conn, pkt_start, len);
 3139:       pkt_start += len;
 3140:     }
 3141:   if (pkt_start != sk->rbuf)
 3142:     {
 3143:       memmove(sk->rbuf, pkt_start, end - pkt_start);
 3144:       sk->rpos = sk->rbuf + (end - pkt_start);
 3145:     }
 3146:   return 0;
 3147: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>