File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / proto / bgp / attrs.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 19:50:23 2021 UTC (4 years ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_8p3, HEAD
bird 1.6.8

    1: /*
    2:  *	BIRD -- BGP Attributes
    3:  *
    4:  *	(c) 2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: #undef LOCAL_DEBUG
   10: 
   11: #include <stdlib.h>
   12: 
   13: #include "nest/bird.h"
   14: #include "nest/iface.h"
   15: #include "nest/protocol.h"
   16: #include "nest/route.h"
   17: #include "nest/attrs.h"
   18: #include "conf/conf.h"
   19: #include "lib/resource.h"
   20: #include "lib/string.h"
   21: #include "lib/unaligned.h"
   22: 
   23: #include "bgp.h"
   24: 
   25: /*
   26:  *   UPDATE message error handling
   27:  *
   28:  * All checks from RFC 4271 6.3 are done as specified with these exceptions:
   29:  *  - The semantic check of an IP address from NEXT_HOP attribute is missing.
   30:  *  - Checks of some optional attribute values are missing.
   31:  *  - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
   32:  *    are probably inadequate.
   33:  *
   34:  * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
   35:  * 4271 does not explicitly specifiy the behavior in that case.
   36:  *
   37:  * Loop detection related to route reflection (based on ORIGINATOR_ID
   38:  * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
   39:  * specifies that such updates should be ignored, but that is generally
   40:  * a bad idea.
   41:  *
   42:  * Error checking of optional transitive attributes is done according to
   43:  * draft-ietf-idr-optional-transitive-03, but errors are handled always
   44:  * as withdraws.
   45:  *
   46:  * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
   47:  * but unknown segments cause a session drop with Malformed AS_PATH
   48:  * error (see validate_path()). The behavior in such case is not
   49:  * explicitly specified by RFC 4271. RFC 5065 specifies that
   50:  * inconsistent AS_CONFED_* segments should cause a session drop, but
   51:  * implementations that pass invalid AS_CONFED_* segments are
   52:  * widespread.
   53:  *
   54:  * Error handling of AS4_* attributes is done as specified by RFC 6793. There
   55:  * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
   56:  * that are not handled by that RFC, these are logged and ignored (see
   57:  * bgp_reconstruct_4b_attrs()).
   58:  */
   59: 
   60: 
   61: static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
   62: #ifndef IPV6
   63: ,BA_NEXT_HOP
   64: #endif
   65: };
   66: 
   67: struct attr_desc {
   68:   char *name;
   69:   int expected_length;
   70:   int expected_flags;
   71:   int type;
   72:   int allow_in_ebgp;
   73:   int (*validate)(struct bgp_proto *p, byte *attr, int len);
   74:   void (*format)(eattr *ea, byte *buf, int buflen);
   75: };
   76: 
   77: #define IGNORE -1
   78: #define WITHDRAW -2
   79: 
   80: static int
   81: bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
   82: {
   83:   if (*a > 2)
   84:     return 6;
   85:   return 0;
   86: }
   87: 
   88: static void
   89: bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
   90: {
   91:   static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
   92: 
   93:   bsprintf(buf, bgp_origin_names[a->u.data]);
   94: }
   95: 
   96: static int
   97: path_segment_contains(byte *p, int bs, u32 asn)
   98: {
   99:   int i;
  100:   int len = p[1];
  101:   p += 2;
  102: 
  103:   for(i=0; i<len; i++)
  104:     {
  105:       u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
  106:       if (asn2 == asn)
  107: 	return 1;
  108:       p += bs;
  109:     }
  110: 
  111:   return 0;
  112: }
  113: 
  114: /* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
  115: static int
  116: validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
  117: {
  118:   int res = 0;
  119:   u8 *a, *dst;
  120:   int len, plen;
  121: 
  122:   dst = a = idata;
  123:   len = *ilength;
  124: 
  125:   while (len)
  126:     {
  127:       if (len < 2)
  128: 	return -1;
  129: 
  130:       plen = 2 + bs * a[1];
  131:       if (len < plen)
  132: 	return -1;
  133: 
  134:       if (a[1] == 0)
  135:         {
  136: 	  log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
  137: 	      p->p.name, as_path ? "AS" : "AS4");
  138: 	  goto skip;
  139: 	}
  140: 
  141:       switch (a[0])
  142: 	{
  143: 	case AS_PATH_SET:
  144: 	  res++;
  145: 	  break;
  146: 
  147: 	case AS_PATH_SEQUENCE:
  148: 	  res += a[1];
  149: 	  break;
  150: 
  151: 	case AS_PATH_CONFED_SEQUENCE:
  152: 	case AS_PATH_CONFED_SET:
  153: 	  if (as_path && path_segment_contains(a, bs, p->remote_as))
  154: 	    {
  155: 	      log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
  156: 	      return -1;
  157: 	    }
  158: 
  159: 	  log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
  160: 	      p->p.name, as_path ? "AS" : "AS4");
  161: 	  goto skip;
  162: 
  163: 	default:
  164: 	  return -1;
  165: 	}
  166: 
  167:       if (dst != a)
  168: 	memmove(dst, a, plen);
  169:       dst += plen;
  170: 
  171:     skip:
  172:       len -= plen;
  173:       a += plen;
  174:     }
  175: 
  176:   *ilength = dst - idata;
  177:   return res;
  178: }
  179: 
  180: static inline int
  181: validate_as_path(struct bgp_proto *p, byte *a, int *len)
  182: {
  183:   return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
  184: }
  185: 
  186: static inline int
  187: validate_as4_path(struct bgp_proto *p, struct adata *path)
  188: {
  189:   return validate_path(p, 0, 4, path->data, &path->length);
  190: }
  191: 
  192: static int
  193: bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
  194: {
  195: #ifdef IPV6
  196:   return IGNORE;
  197: #else
  198:   ip_addr addr;
  199: 
  200:   memcpy(&addr, a, len);
  201:   ipa_ntoh(addr);
  202:   if (ipa_classify(addr) & IADDR_HOST)
  203:     return 0;
  204:   else
  205:     return 8;
  206: #endif
  207: }
  208: 
  209: static void
  210: bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
  211: {
  212:   ip_addr *ipp = (ip_addr *) a->u.ptr->data;
  213: #ifdef IPV6
  214:   /* in IPv6, we might have two addresses in NEXT HOP */
  215:   if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
  216:     {
  217:       bsprintf(buf, "%I %I", ipp[0], ipp[1]);
  218:       return;
  219:     }
  220: #endif
  221: 
  222:   bsprintf(buf, "%I", ipp[0]);
  223: }
  224: 
  225: static int
  226: bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
  227: {
  228:   int exp_len = p->as4_session ? 8 : 6;
  229:   
  230:   return (len == exp_len) ? 0 : WITHDRAW;
  231: }
  232: 
  233: static void
  234: bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
  235: {
  236:   struct adata *ad =  a->u.ptr;
  237:   byte *data = ad->data;
  238:   u32 as;
  239: 
  240:   as = get_u32(data);
  241:   data += 4;
  242: 
  243:   bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
  244: }
  245: 
  246: static int
  247: bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
  248: {
  249:   return ((len % 4) == 0) ? 0 : WITHDRAW;
  250: }
  251: 
  252: static int
  253: bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
  254: {
  255:   return ((len % 4) == 0) ? 0 : 5;
  256: }
  257: 
  258: static void
  259: bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
  260: {
  261:   /* Truncates cluster lists larger than buflen, probably not a problem */
  262:   int_set_format(a->u.ptr, 0, -1, buf, buflen);
  263: }
  264: 
  265: static int
  266: bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
  267: {
  268: #ifdef IPV6
  269:   p->mp_reach_start = a;
  270:   p->mp_reach_len = len;
  271: #endif
  272:   return IGNORE;
  273: }
  274: 
  275: static int
  276: bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
  277: {
  278: #ifdef IPV6
  279:   p->mp_unreach_start = a;
  280:   p->mp_unreach_len = len;
  281: #endif
  282:   return IGNORE;
  283: }
  284: 
  285: static int
  286: bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
  287: {
  288:   return ((len % 8) == 0) ? 0 : WITHDRAW;
  289: }
  290: 
  291: static int
  292: bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
  293: {
  294:   return ((len % 12) == 0) ? 0 : WITHDRAW;
  295: }
  296: 
  297: 
  298: static struct attr_desc bgp_attr_table[] = {
  299:   { NULL, -1, 0, 0, 0,								/* Undefined */
  300:     NULL, NULL },
  301:   { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1,				/* BA_ORIGIN */
  302:     bgp_check_origin, bgp_format_origin },
  303:   { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1,				/* BA_AS_PATH */
  304:     NULL, NULL }, /* is checked by validate_as_path() as a special case */
  305:   { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1,			/* BA_NEXT_HOP */
  306:     bgp_check_next_hop, bgp_format_next_hop },
  307:   { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1,					/* BA_MULTI_EXIT_DISC */
  308:     NULL, NULL },
  309:   { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1,				/* BA_LOCAL_PREF */
  310:     NULL, NULL },
  311:   { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,			/* BA_ATOMIC_AGGR */
  312:     NULL, NULL },
  313:   { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,	/* BA_AGGREGATOR */
  314:     bgp_check_aggregator, bgp_format_aggregator },
  315:   { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1,	/* BA_COMMUNITY */
  316:     bgp_check_community, NULL },
  317:   { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0,			/* BA_ORIGINATOR_ID */
  318:     NULL, NULL },
  319:   { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0,			/* BA_CLUSTER_LIST */
  320:     bgp_check_cluster_list, bgp_format_cluster_list }, 
  321:   { .name = NULL },								/* BA_DPA */
  322:   { .name = NULL },								/* BA_ADVERTISER */
  323:   { .name = NULL },								/* BA_RCID_PATH */
  324:   { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,			/* BA_MP_REACH_NLRI */
  325:     bgp_check_reach_nlri, NULL },
  326:   { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,			/* BA_MP_UNREACH_NLRI */
  327:     bgp_check_unreach_nlri, NULL },
  328:   { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1,	/* BA_EXT_COMMUNITY */
  329:     bgp_check_ext_community, NULL },
  330:   { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,		/* BA_AS4_PATH */
  331:     NULL, NULL },
  332:   { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,	/* BA_AS4_PATH */
  333:     NULL, NULL },
  334:   [BA_LARGE_COMMUNITY] =
  335:   { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
  336:     bgp_check_large_community, NULL }
  337: };
  338: 
  339: /* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
  340:  * It does not matter as this attribute does not appear on routes in the routing table.
  341:  */
  342: 
  343: #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
  344: 
  345: static inline struct adata *
  346: bgp_alloc_adata(struct linpool *pool, unsigned len)
  347: {
  348:   struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
  349:   ad->length = len;
  350:   return ad;
  351: }
  352: 
  353: static void
  354: bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
  355: {
  356:   ASSERT(ATTR_KNOWN(attr));
  357:   e->id = EA_CODE(EAP_BGP, attr);
  358:   e->type = bgp_attr_table[attr].type;
  359:   e->flags = bgp_attr_table[attr].expected_flags;
  360:   if (e->type & EAF_EMBEDDED)
  361:     e->u.data = val;
  362:   else
  363:     e->u.ptr = (struct adata *) val;
  364: }
  365: 
  366: static byte *
  367: bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
  368: {
  369:   struct adata *ad = bgp_alloc_adata(pool, len);
  370:   bgp_set_attr(e, attr, (uintptr_t) ad);
  371:   return ad->data;
  372: }
  373: 
  374: void
  375: bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
  376: {
  377:   ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
  378:   a->next = *to;
  379:   *to = a;
  380:   a->flags = EALF_SORTED;
  381:   a->count = 1;
  382:   bgp_set_attr(a->attrs, attr, val);
  383: }
  384: 
  385: byte *
  386: bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
  387: {
  388:   struct adata *ad = bgp_alloc_adata(pool, len);
  389:   bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
  390:   return ad->data;
  391: }
  392: 
  393: static int
  394: bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
  395: {
  396:   int wlen;
  397: 
  398:   DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
  399: 
  400:   if (len < 256)
  401:     {
  402:       *dst++ = flags;
  403:       *dst++ = code;
  404:       *dst++ = len;
  405:       wlen = 3;
  406:     }
  407:   else
  408:     {
  409:       *dst++ = flags | BAF_EXT_LEN;
  410:       *dst++ = code;
  411:       put_u16(dst, len);
  412:       wlen = 4;
  413:     }
  414: 
  415:   return wlen;
  416: }
  417: 
  418: static void
  419: aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
  420: {
  421:   byte *src = aggr->data;
  422:   *new_used = 0;
  423: 
  424:   u32 as = get_u32(src);
  425:   if (as > 0xFFFF) 
  426:     {
  427:       as = AS_TRANS;
  428:       *new_used = 1;
  429:     }
  430:   put_u16(dst, as);
  431: 
  432:   /* Copy IPv4 address */
  433:   memcpy(dst + 2, src + 4, 4);
  434: }
  435: 
  436: static void
  437: aggregator_convert_to_new(struct adata *aggr, byte *dst)
  438: {
  439:   byte *src = aggr->data;
  440: 
  441:   u32 as   = get_u16(src);
  442:   put_u32(dst, as);
  443: 
  444:   /* Copy IPv4 address */
  445:   memcpy(dst + 4, src + 2, 4);
  446: }
  447: 
  448: static int
  449: bgp_get_attr_len(eattr *a)
  450: {
  451:   int len;
  452:   if (ATTR_KNOWN(EA_ID(a->id)))
  453:     {
  454:       int code = EA_ID(a->id);
  455:       struct attr_desc *desc = &bgp_attr_table[code];
  456:       len = desc->expected_length;
  457:       if (len < 0)
  458: 	{
  459: 	  ASSERT(!(a->type & EAF_EMBEDDED));
  460: 	  len = a->u.ptr->length;
  461: 	}
  462:     }
  463:   else
  464:     {
  465:       ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
  466:       len = a->u.ptr->length;
  467:     }
  468:   
  469:   return len;
  470: }
  471: 
  472: /**
  473:  * bgp_encode_attrs - encode BGP attributes
  474:  * @p: BGP instance (or NULL)
  475:  * @w: buffer
  476:  * @attrs: a list of extended attributes
  477:  * @remains: remaining space in the buffer
  478:  *
  479:  * The bgp_encode_attrs() function takes a list of extended attributes
  480:  * and converts it to its BGP representation (a part of an Update message).
  481:  *
  482:  * Result: Length of the attribute block generated or -1 if not enough space.
  483:  */
  484: uint
  485: bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
  486: {
  487:   uint i, code, type, flags;
  488:   int as4_session = p ? p->as4_session : 1;
  489:   byte *start = w;
  490:   int len, rv;
  491: 
  492:   for(i=0; i<attrs->count; i++)
  493:     {
  494:       eattr *a = &attrs->attrs[i];
  495:       ASSERT(EA_PROTO(a->id) == EAP_BGP);
  496:       code = EA_ID(a->id);
  497: 
  498: #ifdef IPV6
  499:       /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
  500:       if (code == BA_NEXT_HOP)
  501: 	continue;
  502: #endif
  503: 
  504:       /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
  505:        * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH 
  506:        * as optional AS4_PATH attribute.
  507:        */
  508:       if ((code == BA_AS_PATH) && !as4_session)
  509: 	{
  510: 	  len = a->u.ptr->length;
  511: 
  512: 	  if (remains < (len + 4))
  513: 	    goto err_no_buffer;
  514: 
  515: 	  /* Using temporary buffer because don't know a length of created attr
  516: 	   * and therefore a length of a header. Perhaps i should better always
  517: 	   * use BAF_EXT_LEN. */
  518: 	  
  519: 	  byte buf[len];
  520: 	  int new_used;
  521: 	  int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
  522: 
  523: 	  DBG("BGP: Encoding old AS_PATH\n");
  524: 	  rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
  525: 	  ADVANCE(w, remains, rv);
  526: 	  memcpy(w, buf, nl);
  527: 	  ADVANCE(w, remains, nl);
  528: 
  529: 	  if (! new_used)
  530: 	    continue;
  531: 
  532: 	  if (remains < (len + 4))
  533: 	    goto err_no_buffer;
  534: 
  535: 	  /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments 
  536: 	   * here but we don't support confederations and such paths we already
  537: 	   * discarded in bgp_check_as_path().
  538: 	   */
  539: 
  540: 	  DBG("BGP: Encoding AS4_PATH\n");
  541: 	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
  542: 	  ADVANCE(w, remains, rv);
  543: 	  memcpy(w, a->u.ptr->data, len);
  544: 	  ADVANCE(w, remains, len);
  545: 
  546: 	  continue;
  547: 	}
  548: 
  549:       /* The same issue with AGGREGATOR attribute */
  550:       if ((code == BA_AGGREGATOR) && !as4_session)
  551: 	{
  552: 	  int new_used;
  553: 
  554: 	  len = 6;
  555: 	  if (remains < (len + 3))
  556: 	    goto err_no_buffer;
  557: 
  558: 	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
  559: 	  ADVANCE(w, remains, rv);
  560: 	  aggregator_convert_to_old(a->u.ptr, w, &new_used);
  561: 	  ADVANCE(w, remains, len);
  562: 
  563: 	  if (! new_used)
  564: 	    continue;
  565: 
  566: 	  len = 8;
  567: 	  if (remains < (len + 3))
  568: 	    goto err_no_buffer;
  569: 
  570: 	  rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
  571: 	  ADVANCE(w, remains, rv);
  572: 	  memcpy(w, a->u.ptr->data, len);
  573: 	  ADVANCE(w, remains, len);
  574: 
  575: 	  continue;
  576: 	}
  577: 
  578:       /* Standard path continues here ... */
  579: 
  580:       type = a->type & EAF_TYPE_MASK;
  581:       flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
  582:       len = bgp_get_attr_len(a);
  583: 
  584:       /* Skip empty sets */ 
  585:       if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
  586: 	continue; 
  587: 
  588:       if (remains < len + 4)
  589: 	goto err_no_buffer;
  590: 
  591:       rv = bgp_encode_attr_hdr(w, flags, code, len);
  592:       ADVANCE(w, remains, rv);
  593: 
  594:       switch (type)
  595: 	{
  596: 	case EAF_TYPE_INT:
  597: 	case EAF_TYPE_ROUTER_ID:
  598: 	  if (len == 4)
  599: 	    put_u32(w, a->u.data);
  600: 	  else
  601: 	    *w = a->u.data;
  602: 	  break;
  603: 	case EAF_TYPE_IP_ADDRESS:
  604: 	  {
  605: 	    ip_addr ip = *(ip_addr *)a->u.ptr->data;
  606: 	    ipa_hton(ip);
  607: 	    memcpy(w, &ip, len);
  608: 	    break;
  609: 	  }
  610: 	case EAF_TYPE_INT_SET:
  611: 	case EAF_TYPE_LC_SET:
  612: 	case EAF_TYPE_EC_SET:
  613: 	  {
  614: 	    u32 *z = int_set_get_data(a->u.ptr);
  615: 	    int i;
  616: 	    for(i=0; i<len; i+=4)
  617: 	      put_u32(w+i, *z++);
  618: 	    break;
  619: 	  }
  620: 	case EAF_TYPE_OPAQUE:
  621: 	case EAF_TYPE_AS_PATH:
  622: 	  memcpy(w, a->u.ptr->data, len);
  623: 	  break;
  624: 	default:
  625: 	  bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
  626: 	}
  627:       ADVANCE(w, remains, len);
  628:     }
  629:   return w - start;
  630: 
  631:  err_no_buffer:
  632:   return -1;
  633: }
  634: 
  635: /*
  636: static void
  637: bgp_init_prefix(struct fib_node *N)
  638: {
  639:   struct bgp_prefix *p = (struct bgp_prefix *) N;
  640:   p->bucket_node.next = NULL;
  641: }
  642: */
  643: 
  644: static int
  645: bgp_compare_u32(const u32 *x, const u32 *y)
  646: {
  647:   return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
  648: }
  649: 
  650: static inline void
  651: bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
  652: {
  653:   memcpy(dest, src, sizeof(u32) * cnt);
  654:   qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
  655: }
  656: 
  657: static int
  658: bgp_compare_ec(const u32 *xp, const u32 *yp)
  659: {
  660:   u64 x = ec_get(xp, 0);
  661:   u64 y = ec_get(yp, 0);
  662:   return (x < y) ? -1 : (x > y) ? 1 : 0;
  663: }
  664: 
  665: static inline void
  666: bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
  667: {
  668:   u32 *dst = int_set_get_data(ad);
  669: 
  670:   /* Remove non-transitive communities (EC_TBIT active) on external sessions */
  671:   if (! internal)
  672:     {
  673:       int len = int_set_get_size(ad);
  674:       u32 *t = dst;
  675:       int i;
  676: 
  677:       for (i=0; i < len; i += 2)
  678: 	{
  679: 	  if (src[i] & EC_TBIT)
  680: 	    continue;
  681: 	  
  682: 	  *t++ = src[i];
  683: 	  *t++ = src[i+1];
  684: 	}
  685: 
  686:       ad->length = (t - dst) * 4;
  687:     }
  688:   else
  689:     memcpy(dst, src, ad->length);
  690: 
  691:   qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
  692: }
  693: 
  694: static int
  695: bgp_compare_lc(const u32 *x, const u32 *y)
  696: {
  697:   if (x[0] != y[0])
  698:     return (x[0] > y[0]) ? 1 : -1;
  699:   if (x[1] != y[1])
  700:     return (x[1] > y[1]) ? 1 : -1;
  701:   if (x[2] != y[2])
  702:     return (x[2] > y[2]) ? 1 : -1;
  703:   return 0;
  704: }
  705: 
  706: static inline void
  707: bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
  708: {
  709:   memcpy(dest, src, LCOMM_LENGTH * cnt);
  710:   qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
  711: }
  712: 
  713: static void
  714: bgp_rehash_buckets(struct bgp_proto *p)
  715: {
  716:   struct bgp_bucket **old = p->bucket_hash;
  717:   struct bgp_bucket **new;
  718:   unsigned oldn = p->hash_size;
  719:   unsigned i, e, mask;
  720:   struct bgp_bucket *b;
  721: 
  722:   p->hash_size = p->hash_limit;
  723:   DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
  724:   p->hash_limit *= 4;
  725:   if (p->hash_limit >= 65536)
  726:     p->hash_limit = ~0;
  727:   new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
  728:   mask = p->hash_size - 1;
  729:   for (i=0; i<oldn; i++)
  730:     while (b = old[i])
  731:       {
  732: 	old[i] = b->hash_next;
  733: 	e = b->hash & mask;
  734: 	b->hash_next = new[e];
  735: 	if (b->hash_next)
  736: 	  b->hash_next->hash_prev = b;
  737: 	b->hash_prev = NULL;
  738: 	new[e] = b;
  739:       }
  740:   mb_free(old);
  741: }
  742: 
  743: static struct bgp_bucket *
  744: bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
  745: {
  746:   struct bgp_bucket *b;
  747:   unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
  748:   unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
  749:   unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
  750:   unsigned i;
  751:   byte *dest;
  752:   unsigned index = hash & (p->hash_size - 1);
  753: 
  754:   /* Gather total size of non-inline attributes */
  755:   for (i=0; i<new->count; i++)
  756:     {
  757:       eattr *a = &new->attrs[i];
  758:       if (!(a->type & EAF_EMBEDDED))
  759: 	size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
  760:     }
  761: 
  762:   /* Create the bucket and hash it */
  763:   b = mb_alloc(p->p.pool, size);
  764:   b->hash_next = p->bucket_hash[index];
  765:   if (b->hash_next)
  766:     b->hash_next->hash_prev = b;
  767:   p->bucket_hash[index] = b;
  768:   b->hash_prev = NULL;
  769:   b->hash = hash;
  770:   add_tail(&p->bucket_queue, &b->send_node);
  771:   init_list(&b->prefixes);
  772:   memcpy(b->eattrs, new, ea_size);
  773:   dest = ((byte *)b->eattrs) + ea_size_aligned;
  774: 
  775:   /* Copy values of non-inline attributes */
  776:   for (i=0; i<new->count; i++)
  777:     {
  778:       eattr *a = &b->eattrs->attrs[i];
  779:       if (!(a->type & EAF_EMBEDDED))
  780: 	{
  781: 	  struct adata *oa = a->u.ptr;
  782: 	  struct adata *na = (struct adata *) dest;
  783: 	  memcpy(na, oa, sizeof(struct adata) + oa->length);
  784: 	  a->u.ptr = na;
  785: 	  dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
  786: 	}
  787:     }
  788: 
  789:   /* If needed, rehash */
  790:   p->hash_count++;
  791:   if (p->hash_count > p->hash_limit)
  792:     bgp_rehash_buckets(p);
  793: 
  794:   return b;
  795: }
  796: 
  797: static struct bgp_bucket *
  798: bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
  799: {
  800:   ea_list *new;
  801:   unsigned i, cnt, hash, code;
  802:   eattr *a, *d;
  803:   u32 seen = 0;
  804:   struct bgp_bucket *b;
  805: 
  806:   /* Merge the attribute list */
  807:   new = alloca(ea_scan(attrs));
  808:   ea_merge(attrs, new);
  809:   ea_sort(new);
  810: 
  811:   /* Normalize attributes */
  812:   d = new->attrs;
  813:   cnt = new->count;
  814:   new->count = 0;
  815:   for(i=0; i<cnt; i++)
  816:     {
  817:       a = &new->attrs[i];
  818:       if (EA_PROTO(a->id) != EAP_BGP)
  819: 	continue;
  820:       code = EA_ID(a->id);
  821:       if (ATTR_KNOWN(code))
  822: 	{
  823: 	  if (!p->is_internal)
  824: 	    {
  825: 	      if (!bgp_attr_table[code].allow_in_ebgp)
  826: 		continue;
  827: 	      if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
  828: 		continue;
  829: 	    }
  830: 	  /* The flags might have been zero if the attr was added by filters */
  831: 	  a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
  832: 	  if (code < 32)
  833: 	    seen |= 1 << code;
  834: 	}
  835:       else
  836: 	{
  837: 	  /* Don't re-export unknown non-transitive attributes */
  838: 	  if (!(a->flags & BAF_TRANSITIVE))
  839: 	    continue;
  840: 	}
  841:       *d = *a;
  842:       if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
  843: 	d->flags |= BAF_PARTIAL;
  844:       switch (d->type & EAF_TYPE_MASK)
  845: 	{
  846: 	case EAF_TYPE_INT_SET:
  847: 	  {
  848: 	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
  849: 	    z->length = d->u.ptr->length;
  850: 	    bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
  851: 	    d->u.ptr = z;
  852: 	    break;
  853: 	  }
  854: 	case EAF_TYPE_EC_SET:
  855: 	  {
  856: 	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
  857: 	    z->length = d->u.ptr->length;
  858: 	    bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
  859: 	    d->u.ptr = z;
  860: 	    break;
  861: 	  }
  862: 	case EAF_TYPE_LC_SET:
  863: 	  {
  864: 	    struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
  865: 	    z->length = d->u.ptr->length;
  866: 	    bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
  867: 	    d->u.ptr = z;
  868: 	    break;
  869: 	  }
  870: 	default: ;
  871: 	}
  872:       d++;
  873:       new->count++;
  874:     }
  875: 
  876:   /* Hash */
  877:   hash = ea_hash(new);
  878:   for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
  879:     if (b->hash == hash && ea_same(b->eattrs, new))
  880:       {
  881: 	DBG("Found bucket.\n");
  882: 	return b;
  883:       }
  884: 
  885:   /* Ensure that there are all mandatory attributes */
  886:   for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
  887:     if (!(seen & (1 << bgp_mandatory_attrs[i])))
  888:       {
  889: 	log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
  890: 	return NULL;
  891:       }
  892: 
  893:   /* Check if next hop is valid */
  894:   a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
  895:   if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
  896:     {
  897:       log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
  898:       return NULL;
  899:     }
  900: 
  901:   /* Create new bucket */
  902:   DBG("Creating bucket.\n");
  903:   return bgp_new_bucket(p, new, hash);
  904: }
  905: 
  906: void
  907: bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
  908: {
  909:   if (buck->hash_next)
  910:     buck->hash_next->hash_prev = buck->hash_prev;
  911:   if (buck->hash_prev)
  912:     buck->hash_prev->hash_next = buck->hash_next;
  913:   else
  914:     p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
  915:   mb_free(buck);
  916: }
  917: 
  918: 
  919: /* Prefix hash table */
  920: 
  921: #define PXH_KEY(n1)		n1->n.prefix, n1->n.pxlen, n1->path_id
  922: #define PXH_NEXT(n)		n->next
  923: #define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
  924: #define PXH_FN(p,l,i)		ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
  925: 
  926: #define PXH_REHASH		bgp_pxh_rehash
  927: #define PXH_PARAMS		/8, *2, 2, 2, 8, 24
  928: 
  929: 
  930: HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
  931: 
  932: void
  933: bgp_init_prefix_table(struct bgp_proto *p, u32 order)
  934: {
  935:   HASH_INIT(p->prefix_hash, p->p.pool, order);
  936: 
  937:   p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
  938: }
  939: 
  940: void
  941: bgp_free_prefix_table(struct bgp_proto *p)
  942: {
  943:   HASH_FREE(p->prefix_hash);
  944: 
  945:   rfree(p->prefix_slab);
  946:   p->prefix_slab = NULL;
  947: }
  948: 
  949: static struct bgp_prefix *
  950: bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
  951: {
  952:   struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
  953: 
  954:   if (bp)
  955:     return bp;
  956: 
  957:   bp = sl_alloc(p->prefix_slab);
  958:   bp->n.prefix = prefix;
  959:   bp->n.pxlen = pxlen;
  960:   bp->path_id = path_id;
  961:   bp->bucket_node.next = NULL;
  962: 
  963:   HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
  964: 
  965:   return bp;
  966: }
  967: 
  968: void
  969: bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
  970: {
  971:   HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
  972:   sl_free(p->prefix_slab, bp);
  973: }
  974: 
  975: 
  976: void
  977: bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
  978: {
  979:   struct bgp_proto *p = (struct bgp_proto *) P;
  980:   struct bgp_bucket *buck;
  981:   struct bgp_prefix *px;
  982:   rte *key;
  983:   u32 path_id;
  984: 
  985:   DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
  986: 
  987:   if (new)
  988:     {
  989:       key = new;
  990:       buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
  991:       if (!buck)			/* Inconsistent attribute list */
  992: 	return;
  993:     }
  994:   else
  995:     {
  996:       key = old;
  997:       if (!(buck = p->withdraw_bucket))
  998: 	{
  999: 	  buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
 1000: 	  init_list(&buck->prefixes);
 1001: 	}
 1002:     }
 1003:   path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
 1004:   px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
 1005:   if (px->bucket_node.next)
 1006:     {
 1007:       DBG("\tRemoving old entry.\n");
 1008:       rem_node(&px->bucket_node);
 1009:     }
 1010:   add_tail(&buck->prefixes, &px->bucket_node);
 1011:   bgp_schedule_packet(p->conn, PKT_UPDATE);
 1012: }
 1013: 
 1014: static int
 1015: bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
 1016: {
 1017:   ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
 1018:   rta *rta = e->attrs;
 1019:   byte *z;
 1020: 
 1021:   ea->next = *attrs;
 1022:   *attrs = ea;
 1023:   ea->flags = EALF_SORTED;
 1024:   ea->count = 4;
 1025: 
 1026:   bgp_set_attr(ea->attrs, BA_ORIGIN,
 1027:        ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
 1028: 
 1029:   if (p->is_internal)
 1030:     bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
 1031:   else
 1032:     {
 1033:       z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
 1034:       z[0] = AS_PATH_SEQUENCE;
 1035:       z[1] = 1;				/* 1 AS */
 1036:       put_u32(z+2, p->local_as);
 1037:     }
 1038: 
 1039:   /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
 1040:      eBGP single-hop -> use gw if on the same iface */
 1041:   z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
 1042:   if (p->cf->next_hop_self ||
 1043:       rta->dest != RTD_ROUTER ||
 1044:       ipa_equal(rta->gw, IPA_NONE) ||
 1045:       ipa_is_link_local(rta->gw) ||
 1046:       (!p->is_internal && !p->cf->next_hop_keep &&
 1047:        (!p->neigh || (rta->iface != p->neigh->iface))))
 1048:     set_next_hop(z, p->source_addr);
 1049:   else
 1050:     set_next_hop(z, rta->gw);
 1051: 
 1052:   bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
 1053: 
 1054:   return 0;				/* Leave decision to the filters */
 1055: }
 1056: 
 1057: 
 1058: static inline int
 1059: bgp_as_path_loopy(struct bgp_proto *p, rta *a)
 1060: {
 1061:   int num = p->cf->allow_local_as + 1;
 1062:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1063:   return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
 1064: }
 1065: 
 1066: static inline int
 1067: bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
 1068: {
 1069:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
 1070:   return (e && (e->u.data == p->local_id));
 1071: }
 1072: 
 1073: static inline int
 1074: bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
 1075: {
 1076:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
 1077:   return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
 1078: }
 1079: 
 1080: 
 1081: static inline void
 1082: bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
 1083: {
 1084:   eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1085:   bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
 1086: }
 1087: 
 1088: static inline void
 1089: bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
 1090: {
 1091:   eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
 1092:   bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
 1093: }
 1094: 
 1095: static int
 1096: bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
 1097: {
 1098:   eattr *a;
 1099: 
 1100:   if (!p->is_internal && !p->rs_client)
 1101:     {
 1102:       bgp_path_prepend(e, attrs, pool, p->local_as);
 1103: 
 1104:       /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
 1105:        * propagated to other neighboring ASes.
 1106:        * Perhaps it would be better to undefine it.
 1107:        */
 1108:       a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
 1109:       if (a)
 1110: 	bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
 1111:     }
 1112: 
 1113:   /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
 1114:    * eBGP single-hop -> keep next_hop if on the same iface.
 1115:    * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
 1116:    *
 1117:    * Note that same-iface-check uses iface from route, which is based on gw.
 1118:    */
 1119:   a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
 1120:   if (a && !p->cf->next_hop_self && 
 1121:       (p->cf->next_hop_keep ||
 1122:        (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
 1123:        (p->neigh && (e->attrs->iface == p->neigh->iface))))
 1124:     {
 1125:       /* Leave the original next hop attribute, will check later where does it point */
 1126:     }
 1127:   else
 1128:     {
 1129:       /* Need to create new one */
 1130:       byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
 1131:       set_next_hop(b, p->source_addr);
 1132:     }
 1133: 
 1134:   if (rr)
 1135:     {
 1136:       /* Handling route reflection, RFC 4456 */
 1137:       struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
 1138: 
 1139:       a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
 1140:       if (!a)
 1141: 	bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
 1142: 
 1143:       /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
 1144:       bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
 1145: 
 1146:       /* Two RR clients with different cluster ID, hmmm */
 1147:       if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
 1148: 	bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
 1149:     }
 1150: 
 1151:   return 0;				/* Leave decision to the filters */
 1152: }
 1153: 
 1154: static int
 1155: bgp_community_filter(struct bgp_proto *p, rte *e)
 1156: {
 1157:   eattr *a;
 1158:   struct adata *d;
 1159: 
 1160:   /* Check if we aren't forbidden to export the route by communities */
 1161:   a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
 1162:   if (a)
 1163:     {
 1164:       d = a->u.ptr;
 1165:       if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
 1166: 	{
 1167: 	  DBG("\tNO_ADVERTISE\n");
 1168: 	  return 1;
 1169: 	}
 1170:       if (!p->is_internal &&
 1171: 	  (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
 1172: 	   int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
 1173: 	{
 1174: 	  DBG("\tNO_EXPORT\n");
 1175: 	  return 1;
 1176: 	}
 1177: 
 1178:       if (!p->conn->peer_llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
 1179: 	return 1;
 1180:     }
 1181: 
 1182:   return 0;
 1183: }
 1184: 
 1185: int
 1186: bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
 1187: {
 1188:   rte *e = *new;
 1189:   struct bgp_proto *p = (struct bgp_proto *) P;
 1190:   struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
 1191:     (struct bgp_proto *) e->attrs->src->proto : NULL;
 1192: 
 1193:   if (p == new_bgp)			/* Poison reverse updates */
 1194:     return -1;
 1195:   if (new_bgp)
 1196:     {
 1197:       /* We should check here for cluster list loop, because the receiving BGP instance
 1198: 	 might have different cluster ID  */
 1199:       if (bgp_cluster_list_loopy(p, e->attrs))
 1200: 	return -1;
 1201: 
 1202:       if (p->cf->interpret_communities && bgp_community_filter(p, e))
 1203: 	return -1;
 1204: 
 1205:       if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
 1206: 	{
 1207: 	  /* Redistribution of internal routes with IBGP */
 1208: 	  if (p->rr_client || new_bgp->rr_client)
 1209: 	    /* Route reflection, RFC 4456 */
 1210: 	    return bgp_update_attrs(p, e, attrs, pool, 1);
 1211: 	  else
 1212: 	    return -1;
 1213: 	}
 1214:       else
 1215: 	return bgp_update_attrs(p, e, attrs, pool, 0);
 1216:     }
 1217:   else
 1218:     return bgp_create_attrs(p, e, attrs, pool);
 1219: }
 1220: 
 1221: static inline u32
 1222: bgp_get_neighbor(rte *r)
 1223: {
 1224:   eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1225:   u32 as;
 1226: 
 1227:   if (e && as_path_get_first(e->u.ptr, &as))
 1228:     return as;
 1229:   else
 1230:     return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
 1231: }
 1232: 
 1233: static inline int
 1234: rte_resolvable(rte *rt)
 1235: {
 1236:   int rd = rt->attrs->dest;  
 1237:   return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
 1238: }
 1239: 
 1240: static inline int
 1241: rte_stale(rte *r)
 1242: {
 1243:   if (r->u.bgp.stale < 0)
 1244:   {
 1245:     /* If staleness is unknown, compute and cache it */
 1246:     eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
 1247:     r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
 1248:   }
 1249: 
 1250:   return r->u.bgp.stale;
 1251: }
 1252: 
 1253: int
 1254: bgp_rte_better(rte *new, rte *old)
 1255: {
 1256:   struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
 1257:   struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
 1258:   eattr *x, *y;
 1259:   u32 n, o;
 1260: 
 1261:   /* Skip suppressed routes (see bgp_rte_recalculate()) */
 1262:   n = new->u.bgp.suppressed;
 1263:   o = old->u.bgp.suppressed;
 1264:   if (n > o)
 1265:     return 0;
 1266:   if (n < o)
 1267:     return 1;
 1268: 
 1269:   /* RFC 4271 9.1.2.1. Route resolvability test */
 1270:   n = rte_resolvable(new);
 1271:   o = rte_resolvable(old);
 1272:   if (n > o)
 1273:     return 1;
 1274:   if (n < o)
 1275:     return 0;
 1276: 
 1277:   /* LLGR draft - depreference stale routes */
 1278:   n = rte_stale(new);
 1279:   o = rte_stale(old);
 1280:   if (n > o)
 1281:     return 0;
 1282:   if (n < o)
 1283:     return 1;
 1284: 
 1285:   /* Start with local preferences */
 1286:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
 1287:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
 1288:   n = x ? x->u.data : new_bgp->cf->default_local_pref;
 1289:   o = y ? y->u.data : old_bgp->cf->default_local_pref;
 1290:   if (n > o)
 1291:     return 1;
 1292:   if (n < o)
 1293:     return 0;
 1294: 
 1295:   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
 1296:   if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
 1297:     {
 1298:       x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1299:       y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1300:       n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
 1301:       o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
 1302:       if (n < o)
 1303: 	return 1;
 1304:       if (n > o)
 1305: 	return 0;
 1306:     }
 1307: 
 1308:   /* RFC 4271 9.1.2.2. b) Use origins */
 1309:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
 1310:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
 1311:   n = x ? x->u.data : ORIGIN_INCOMPLETE;
 1312:   o = y ? y->u.data : ORIGIN_INCOMPLETE;
 1313:   if (n < o)
 1314:     return 1;
 1315:   if (n > o)
 1316:     return 0;
 1317: 
 1318:   /* RFC 4271 9.1.2.2. c) Compare MED's */
 1319:   /* Proper RFC 4271 path selection cannot be interpreted as finding
 1320:    * the best path in some ordering. It is implemented partially in
 1321:    * bgp_rte_recalculate() when deterministic_med option is
 1322:    * active. Without that option, the behavior is just an
 1323:    * approximation, which in specific situations may lead to
 1324:    * persistent routing loops, because it is nondeterministic - it
 1325:    * depends on the order in which routes appeared. But it is also the
 1326:    * same behavior as used by default in Cisco routers, so it is
 1327:    * probably not a big issue.
 1328:    */
 1329:   if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
 1330:       (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
 1331:     {
 1332:       x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
 1333:       y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
 1334:       n = x ? x->u.data : new_bgp->cf->default_med;
 1335:       o = y ? y->u.data : old_bgp->cf->default_med;
 1336:       if (n < o)
 1337: 	return 1;
 1338:       if (n > o)
 1339: 	return 0;
 1340:     }
 1341: 
 1342:   /* RFC 4271 9.1.2.2. d) Prefer external peers */
 1343:   if (new_bgp->is_internal > old_bgp->is_internal)
 1344:     return 0;
 1345:   if (new_bgp->is_internal < old_bgp->is_internal)
 1346:     return 1;
 1347: 
 1348:   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
 1349:   n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
 1350:   o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
 1351:   if (n < o)
 1352:     return 1;
 1353:   if (n > o)
 1354:     return 0;
 1355: 
 1356:   /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
 1357:   /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
 1358:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
 1359:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
 1360:   n = x ? x->u.data : new_bgp->remote_id;
 1361:   o = y ? y->u.data : old_bgp->remote_id;
 1362: 
 1363:   /* RFC 5004 - prefer older routes */
 1364:   /* (if both are external and from different peer) */
 1365:   if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
 1366:       !new_bgp->is_internal && n != o)
 1367:     return 0;
 1368: 
 1369:   /* rest of RFC 4271 9.1.2.2. f) */
 1370:   if (n < o)
 1371:     return 1;
 1372:   if (n > o)
 1373:     return 0;
 1374: 
 1375:   /* RFC 4456 9. b) Compare cluster list lengths */
 1376:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
 1377:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
 1378:   n = x ? int_set_get_size(x->u.ptr) : 0;
 1379:   o = y ? int_set_get_size(y->u.ptr) : 0;
 1380:   if (n < o)
 1381:     return 1;
 1382:   if (n > o)
 1383:     return 0;
 1384: 
 1385:   /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
 1386:   return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
 1387: }
 1388: 
 1389: 
 1390: int
 1391: bgp_rte_mergable(rte *pri, rte *sec)
 1392: {
 1393:   struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
 1394:   struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
 1395:   eattr *x, *y;
 1396:   u32 p, s;
 1397: 
 1398:   /* Skip suppressed routes (see bgp_rte_recalculate()) */
 1399:   if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
 1400:     return 0;
 1401: 
 1402:   /* RFC 4271 9.1.2.1. Route resolvability test */
 1403:   if (!rte_resolvable(sec))
 1404:     return 0;
 1405: 
 1406:   /* LLGR draft - depreference stale routes */
 1407:   if (rte_stale(pri) != rte_stale(sec))
 1408:     return 0;
 1409: 
 1410:   /* Start with local preferences */
 1411:   x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
 1412:   y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
 1413:   p = x ? x->u.data : pri_bgp->cf->default_local_pref;
 1414:   s = y ? y->u.data : sec_bgp->cf->default_local_pref;
 1415:   if (p != s)
 1416:     return 0;
 1417: 
 1418:   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
 1419:   if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
 1420:     {
 1421:       x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1422:       y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1423:       p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
 1424:       s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
 1425: 
 1426:       if (p != s)
 1427: 	return 0;
 1428: 
 1429: //      if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
 1430: //	return 0;
 1431:     }
 1432: 
 1433:   /* RFC 4271 9.1.2.2. b) Use origins */
 1434:   x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
 1435:   y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
 1436:   p = x ? x->u.data : ORIGIN_INCOMPLETE;
 1437:   s = y ? y->u.data : ORIGIN_INCOMPLETE;
 1438:   if (p != s)
 1439:     return 0;
 1440: 
 1441:   /* RFC 4271 9.1.2.2. c) Compare MED's */
 1442:   if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
 1443:       (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
 1444:     {
 1445:       x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
 1446:       y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
 1447:       p = x ? x->u.data : pri_bgp->cf->default_med;
 1448:       s = y ? y->u.data : sec_bgp->cf->default_med;
 1449:       if (p != s)
 1450: 	return 0;
 1451:     }
 1452: 
 1453:   /* RFC 4271 9.1.2.2. d) Prefer external peers */
 1454:   if (pri_bgp->is_internal != sec_bgp->is_internal)
 1455:     return 0;
 1456: 
 1457:   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
 1458:   p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
 1459:   s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
 1460:   if (p != s)
 1461:     return 0;
 1462: 
 1463:   /* Remaining criteria are ignored */
 1464: 
 1465:   return 1;
 1466: }
 1467: 
 1468: 
 1469: 
 1470: static inline int
 1471: same_group(rte *r, u32 lpref, u32 lasn)
 1472: {
 1473:   return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
 1474: }
 1475: 
 1476: static inline int
 1477: use_deterministic_med(rte *r)
 1478: {
 1479:   struct proto *P = r->attrs->src->proto;
 1480:   return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
 1481: }
 1482: 
 1483: int
 1484: bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
 1485: {
 1486:   rte *r, *s;
 1487:   rte *key = new ? new : old;
 1488:   u32 lpref = key->pref;
 1489:   u32 lasn = bgp_get_neighbor(key);
 1490:   int old_is_group_best = 0;
 1491: 
 1492:   /*
 1493:    * Proper RFC 4271 path selection is a bit complicated, it cannot be
 1494:    * implemented just by rte_better(), because it is not a linear
 1495:    * ordering. But it can be splitted to two levels, where the lower
 1496:    * level chooses the best routes in each group of routes from the
 1497:    * same neighboring AS and higher level chooses the best route (with
 1498:    * a slightly different ordering) between the best-in-group routes.
 1499:    *
 1500:    * When deterministic_med is disabled, we just ignore this issue and
 1501:    * choose the best route by bgp_rte_better() alone. If enabled, the
 1502:    * lower level of the route selection is done here (for the group
 1503:    * to which the changed route belongs), all routes in group are
 1504:    * marked as suppressed, just chosen best-in-group is not.
 1505:    *
 1506:    * Global best route selection then implements higher level by
 1507:    * choosing between non-suppressed routes (as they are always
 1508:    * preferred over suppressed routes). Routes from BGP protocols
 1509:    * that do not set deterministic_med are just never suppressed. As
 1510:    * they do not participate in the lower level selection, it is OK
 1511:    * that this fn is not called for them.
 1512:    *
 1513:    * The idea is simple, the implementation is more problematic,
 1514:    * mostly because of optimizations in rte_recalculate() that 
 1515:    * avoids full recalculation in most cases.
 1516:    *
 1517:    * We can assume that at least one of new, old is non-NULL and both
 1518:    * are from the same protocol with enabled deterministic_med. We
 1519:    * group routes by both neighbor AS (lasn) and preference (lpref),
 1520:    * because bgp_rte_better() does not handle preference itself.
 1521:    */
 1522: 
 1523:   /* If new and old are from different groups, we just process that
 1524:      as two independent events */
 1525:   if (new && old && !same_group(old, lpref, lasn))
 1526:     {
 1527:       int i1, i2;
 1528:       i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
 1529:       i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
 1530:       return i1 || i2;
 1531:     }
 1532: 
 1533:   /* 
 1534:    * We could find the best-in-group and then make some shortcuts like
 1535:    * in rte_recalculate, but as we would have to walk through all
 1536:    * net->routes just to find it, it is probably not worth. So we
 1537:    * just have two simpler fast cases that use just the old route.
 1538:    * We also set suppressed flag to avoid using it in bgp_rte_better().
 1539:    */
 1540: 
 1541:   if (new)
 1542:     new->u.bgp.suppressed = 1;
 1543: 
 1544:   if (old)
 1545:     {
 1546:       old_is_group_best = !old->u.bgp.suppressed;
 1547:       old->u.bgp.suppressed = 1;
 1548:       int new_is_better = new && bgp_rte_better(new, old);
 1549: 
 1550:       /* The first case - replace not best with worse (or remove not best) */
 1551:       if (!old_is_group_best && !new_is_better)
 1552: 	return 0;
 1553: 
 1554:       /* The second case - replace the best with better */
 1555:       if (old_is_group_best && new_is_better)
 1556: 	{
 1557: 	  /* new is best-in-group, the see discussion below - this is
 1558: 	     a special variant of NBG && OBG. From OBG we can deduce
 1559: 	     that same_group(old_best) iff (old == old_best)  */
 1560: 	  new->u.bgp.suppressed = 0;
 1561: 	  return (old == old_best);
 1562: 	}
 1563:     }
 1564: 
 1565:   /* The default case - find a new best-in-group route */
 1566:   r = new; /* new may not be in the list */
 1567:   for (s=net->routes; rte_is_valid(s); s=s->next)
 1568:     if (use_deterministic_med(s) && same_group(s, lpref, lasn))
 1569:       {
 1570: 	s->u.bgp.suppressed = 1;
 1571: 	if (!r || bgp_rte_better(s, r))
 1572: 	  r = s;
 1573:       }
 1574: 
 1575:   /* Simple case - the last route in group disappears */
 1576:   if (!r)
 1577:     return 0;
 1578: 
 1579:   /* Found best-in-group */
 1580:   r->u.bgp.suppressed = 0;
 1581: 
 1582:   /*
 1583:    * There are generally two reasons why we have to force
 1584:    * recalculation (return 1): First, the new route may be wrongfully
 1585:    * chosen to be the best in the first case check in
 1586:    * rte_recalculate(), this may happen only if old_best is from the
 1587:    * same group. Second, another (different than new route)
 1588:    * best-in-group is chosen and that may be the proper best (although
 1589:    * rte_recalculate() without ignore that possibility).
 1590:    *
 1591:    * There are three possible cases according to whether the old route
 1592:    * was the best in group (OBG, stored in old_is_group_best) and
 1593:    * whether the new route is the best in group (NBG, tested by r == new).
 1594:    * These cases work even if old or new is NULL.
 1595:    *
 1596:    * NBG -> new is a possible candidate for the best route, so we just
 1597:    *        check for the first reason using same_group().
 1598:    *
 1599:    * !NBG && OBG -> Second reason applies, return 1
 1600:    *
 1601:    * !NBG && !OBG -> Best in group does not change, old != old_best,
 1602:    *                 rte_better(new, old_best) is false and therefore
 1603:    *                 the first reason does not apply, return 0
 1604:    */
 1605: 
 1606:   if (r == new)
 1607:     return old_best && same_group(old_best, lpref, lasn);
 1608:   else
 1609:     return old_is_group_best;
 1610: }
 1611: 
 1612: struct rte *
 1613: bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
 1614: {
 1615:   eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
 1616:   struct adata *ad = a ? a->u.ptr : NULL;
 1617: 
 1618:   if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
 1619:     return NULL;
 1620: 
 1621:   if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
 1622:     return r;
 1623: 
 1624:   r = rte_cow_rta(r, pool);
 1625:   bgp_attach_attr(&(r->attrs->eattrs), pool, BA_COMMUNITY,
 1626: 		  (uintptr_t) int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
 1627:   r->u.bgp.stale = 1;
 1628: 
 1629:   return r;
 1630: }
 1631: 
 1632: 
 1633: static struct adata *
 1634: bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
 1635: {
 1636:   struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
 1637:   newa->length = 8;
 1638:   aggregator_convert_to_new(old, newa->data);
 1639:   return newa;
 1640: }
 1641: 
 1642: /* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
 1643:  * and append path old4 (in 4B format).
 1644:  */
 1645: static struct adata *
 1646: bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
 1647: {
 1648:   byte buf[old2->length * 2];
 1649: 
 1650:   int ol = as_path_convert_to_new(old2, buf, req_as);
 1651:   int nl = ol + (old4 ? old4->length : 0);
 1652: 
 1653:   struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
 1654:   newa->length = nl;
 1655:   memcpy(newa->data, buf, ol);
 1656:   if (old4) memcpy(newa->data + ol, old4->data, old4->length);
 1657: 
 1658:   return newa;
 1659: }
 1660: 
 1661: static int
 1662: as4_aggregator_valid(struct adata *aggr)
 1663: {
 1664:   return aggr->length == 8;
 1665: }
 1666: 
 1667: 
 1668: /* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
 1669: static void
 1670: bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
 1671: {
 1672:   eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 1673:   eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
 1674:   eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
 1675:   eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
 1676:   int a4_removed = 0;
 1677: 
 1678:   if (a4 && !as4_aggregator_valid(a4->u.ptr))
 1679:     {
 1680:       log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
 1681:       a4 = NULL;
 1682:       a4_removed = 1;
 1683:     }
 1684: 
 1685:   if (a2)
 1686:     {
 1687:       u32 a2_as = get_u16(a2->u.ptr->data);
 1688: 
 1689:       if (a4)
 1690: 	{
 1691: 	  if (a2_as != AS_TRANS)
 1692: 	    {
 1693: 	      /* Routes were aggregated by old router and therefore AS4_PATH
 1694: 	       * and AS4_AGGREGATOR is invalid
 1695: 	       *
 1696: 	       * Convert AS_PATH and AGGREGATOR to 4B format and finish.
 1697: 	       */
 1698: 
 1699: 	      a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
 1700: 	      p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
 1701: 
 1702: 	      return;
 1703: 	    }
 1704: 	  else
 1705: 	    {
 1706: 	      /* Common case, use AS4_AGGREGATOR attribute */
 1707: 	      a2->u.ptr = a4->u.ptr;
 1708: 	    }
 1709: 	}
 1710:       else
 1711: 	{
 1712: 	  /* Common case, use old AGGREGATOR attribute */
 1713: 	  a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
 1714: 
 1715: 	  if ((a2_as == AS_TRANS) && !a4_removed)
 1716: 	    log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
 1717: 	}
 1718:     }
 1719:   else
 1720:     if (a4)
 1721:       log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
 1722: 
 1723:   int p2_len = as_path_getlen_int(p2->u.ptr, 2);
 1724:   int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
 1725: 
 1726:   if (p4 && (p4_len < 0))
 1727:     log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
 1728: 
 1729:   if ((p4_len <= 0) || (p2_len < p4_len))
 1730:     p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
 1731:   else
 1732:     p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
 1733: }
 1734: 
 1735: static void
 1736: bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
 1737: {
 1738:   unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
 1739:   unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
 1740:   ea_list **el = &(a->eattrs);
 1741: 
 1742:   /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
 1743:   while (*el != NULL)
 1744:     {
 1745:       unsigned fid = (*el)->attrs[0].id;
 1746: 
 1747:       if ((fid == id1) || (fid == id2))
 1748: 	{
 1749: 	  *el = (*el)->next;
 1750: 	  if (p->as4_session)
 1751: 	    log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
 1752: 	}
 1753:       else
 1754: 	el = &((*el)->next);
 1755:     }
 1756: }
 1757: 
 1758: /**
 1759:  * bgp_decode_attrs - check and decode BGP attributes
 1760:  * @conn: connection
 1761:  * @attr: start of attribute block
 1762:  * @len: length of attribute block
 1763:  * @pool: linear pool to make all the allocations in
 1764:  * @mandatory: 1 iff presence of mandatory attributes has to be checked
 1765:  *
 1766:  * This function takes a BGP attribute block (a part of an Update message), checks
 1767:  * its consistency and converts it to a list of BIRD route attributes represented
 1768:  * by a &rta.
 1769:  */
 1770: struct rta *
 1771: bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
 1772: {
 1773:   struct bgp_proto *bgp = conn->bgp;
 1774:   rta *a = lp_alloc(pool, sizeof(struct rta));
 1775:   uint flags, code, l, i, type;
 1776:   int errcode;
 1777:   byte *z, *attr_start;
 1778:   byte seen[256/8];
 1779:   ea_list *ea;
 1780:   struct adata *ad;
 1781:   int withdraw = 0;
 1782: 
 1783:   bzero(a, sizeof(rta));
 1784:   a->source = RTS_BGP;
 1785:   a->scope = SCOPE_UNIVERSE;
 1786:   a->cast = RTC_UNICAST;
 1787:   /* a->dest = RTD_ROUTER;  -- set in bgp_set_next_hop() */
 1788:   a->from = bgp->cf->remote_ip;
 1789: 
 1790:   /* Parse the attributes */
 1791:   bzero(seen, sizeof(seen));
 1792:   DBG("BGP: Parsing attributes\n");
 1793:   while (len)
 1794:     {
 1795:       if (len < 2)
 1796: 	goto malformed;
 1797:       attr_start = attr;
 1798:       flags = *attr++;
 1799:       code = *attr++;
 1800:       len -= 2;
 1801:       if (flags & BAF_EXT_LEN)
 1802: 	{
 1803: 	  if (len < 2)
 1804: 	    goto malformed;
 1805: 	  l = get_u16(attr);
 1806: 	  attr += 2;
 1807: 	  len -= 2;
 1808: 	}
 1809:       else
 1810: 	{
 1811: 	  if (len < 1)
 1812: 	    goto malformed;
 1813: 	  l = *attr++;
 1814: 	  len--;
 1815: 	}
 1816:       if (l > len)
 1817: 	goto malformed;
 1818:       len -= l;
 1819:       z = attr;
 1820:       attr += l;
 1821:       DBG("Attr %02x %02x %d\n", code, flags, l);
 1822:       if (seen[code/8] & (1 << (code%8)))
 1823: 	goto malformed;
 1824:       if (ATTR_KNOWN(code))
 1825: 	{
 1826: 	  struct attr_desc *desc = &bgp_attr_table[code];
 1827: 	  if (desc->expected_length >= 0 && desc->expected_length != (int) l)
 1828: 	    { errcode = 5; goto err; }
 1829: 	  if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
 1830: 	    { errcode = 4; goto err; }
 1831: 	  if (!bgp->is_internal)
 1832: 	    {
 1833: 	      if (!desc->allow_in_ebgp)
 1834: 		continue;
 1835: 	      if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
 1836: 		continue;
 1837: 	    }
 1838: 	  if (desc->validate)
 1839: 	    {
 1840: 	      errcode = desc->validate(bgp, z, l);
 1841: 	      if (errcode > 0)
 1842: 		goto err;
 1843: 	      if (errcode == IGNORE)
 1844: 		continue;
 1845: 	      if (errcode <= WITHDRAW)
 1846: 		{
 1847: 		  log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
 1848: 		      bgp->p.name, desc->name);
 1849: 		  withdraw = 1;
 1850: 		}
 1851: 	    }
 1852: 	  else if (code == BA_AS_PATH)
 1853: 	    {
 1854: 	      /* Special case as it might also trim the attribute */
 1855: 	      if (validate_as_path(bgp, z, &l) < 0)
 1856: 		{ errcode = 11; goto err; }
 1857: 	    }
 1858: 	  type = desc->type;
 1859: 	}
 1860:       else				/* Unknown attribute */
 1861: 	{
 1862: 	  if (!(flags & BAF_OPTIONAL))
 1863: 	    { errcode = 2; goto err; }
 1864: 	  type = EAF_TYPE_OPAQUE;
 1865: 	}
 1866:       
 1867:       // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
 1868:       // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
 1869:       //   { errcode = 4; goto err; }
 1870: 
 1871:       seen[code/8] |= (1 << (code%8));
 1872:       ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
 1873:       ea->next = a->eattrs;
 1874:       a->eattrs = ea;
 1875:       ea->flags = 0;
 1876:       ea->count = 1;
 1877:       ea->attrs[0].id = EA_CODE(EAP_BGP, code);
 1878:       ea->attrs[0].flags = flags;
 1879:       ea->attrs[0].type = type;
 1880:       if (type & EAF_EMBEDDED)
 1881: 	ad = NULL;
 1882:       else
 1883: 	{
 1884: 	  ad = lp_alloc(pool, sizeof(struct adata) + l);
 1885: 	  ea->attrs[0].u.ptr = ad;
 1886: 	  ad->length = l;
 1887: 	  memcpy(ad->data, z, l);
 1888: 	}
 1889:       switch (type)
 1890: 	{
 1891: 	case EAF_TYPE_ROUTER_ID:
 1892: 	case EAF_TYPE_INT:
 1893: 	  if (l == 1)
 1894: 	    ea->attrs[0].u.data = *z;
 1895: 	  else
 1896: 	    ea->attrs[0].u.data = get_u32(z);
 1897: 	  break;
 1898: 	case EAF_TYPE_IP_ADDRESS:
 1899: 	  ipa_ntoh(*(ip_addr *)ad->data);
 1900: 	  break;
 1901: 	case EAF_TYPE_INT_SET:
 1902: 	case EAF_TYPE_LC_SET:
 1903: 	case EAF_TYPE_EC_SET:
 1904: 	  {
 1905: 	    u32 *z = (u32 *) ad->data;
 1906: 	    for(i=0; i<ad->length/4; i++)
 1907: 	      z[i] = ntohl(z[i]);
 1908: 	    break;
 1909: 	  }
 1910: 	}
 1911:     }
 1912: 
 1913:   if (withdraw)
 1914:     goto withdraw;
 1915: 
 1916: #ifdef IPV6
 1917:   /* If we received MP_REACH_NLRI we should check mandatory attributes */
 1918:   if (bgp->mp_reach_len != 0)
 1919:     mandatory = 1;
 1920: #endif
 1921: 
 1922:   /* If there is no (reachability) NLRI, we should exit now */
 1923:   if (! mandatory)
 1924:     return a;
 1925: 
 1926:   /* Check if all mandatory attributes are present */
 1927:   for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
 1928:     {
 1929:       code = bgp_mandatory_attrs[i];
 1930:       if (!(seen[code/8] & (1 << (code%8))))
 1931: 	{
 1932: 	  bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
 1933: 	  return NULL;
 1934: 	}
 1935:     }
 1936: 
 1937:   /* When receiving attributes from non-AS4-aware BGP speaker,
 1938:    * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
 1939:    */
 1940:   if (! bgp->as4_session)
 1941:     bgp_reconstruct_4b_atts(bgp, a, pool);
 1942: 
 1943:   bgp_remove_as4_attrs(bgp, a);
 1944: 
 1945:   /* If the AS path attribute contains our AS, reject the routes */
 1946:   if (bgp_as_path_loopy(bgp, a))
 1947:     goto withdraw;
 1948: 
 1949:   /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ 
 1950:   if (bgp_originator_id_loopy(bgp, a) ||
 1951:       bgp_cluster_list_loopy(bgp, a))
 1952:     goto withdraw;
 1953: 
 1954:   /* If there's no local preference, define one */
 1955:   if (!(seen[0] & (1 << BA_LOCAL_PREF)))
 1956:     bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
 1957: 
 1958:   return a;
 1959: 
 1960: withdraw:
 1961:   return NULL;
 1962: 
 1963: malformed:
 1964:   bgp_error(conn, 3, 1, NULL, 0);
 1965:   return NULL;
 1966: 
 1967: err:
 1968:   bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
 1969:   return NULL;
 1970: }
 1971: 
 1972: int
 1973: bgp_get_attr(eattr *a, byte *buf, int buflen)
 1974: {
 1975:   uint i = EA_ID(a->id);
 1976:   struct attr_desc *d;
 1977:   int len;
 1978: 
 1979:   if (ATTR_KNOWN(i))
 1980:     {
 1981:       d = &bgp_attr_table[i];
 1982:       len = bsprintf(buf, "%s", d->name);
 1983:       buf += len;
 1984:       if (d->format)
 1985: 	{
 1986: 	  *buf++ = ':';
 1987: 	  *buf++ = ' ';
 1988: 	  d->format(a, buf, buflen - len - 2);
 1989: 	  return GA_FULL;
 1990: 	}
 1991:       return GA_NAME;
 1992:     }
 1993:   bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
 1994:   return GA_NAME;
 1995: }
 1996: 
 1997: void
 1998: bgp_init_bucket_table(struct bgp_proto *p)
 1999: {
 2000:   p->hash_size = 256;
 2001:   p->hash_limit = p->hash_size * 4;
 2002:   p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
 2003:   init_list(&p->bucket_queue);
 2004:   p->withdraw_bucket = NULL;
 2005:   // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
 2006: }
 2007: 
 2008: void
 2009: bgp_free_bucket_table(struct bgp_proto *p)
 2010: {
 2011:   mb_free(p->bucket_hash);
 2012:   p->bucket_hash = NULL;
 2013: 
 2014:   struct bgp_bucket *b;
 2015:   WALK_LIST_FIRST(b, p->bucket_queue)
 2016:   {
 2017:     rem_node(&b->send_node);
 2018:     mb_free(b);
 2019:   }
 2020: 
 2021:   mb_free(p->withdraw_bucket);
 2022:   p->withdraw_bucket = NULL;
 2023: }
 2024: 
 2025: void
 2026: bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
 2027: {
 2028:   eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
 2029:   eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
 2030:   u32 origas;
 2031: 
 2032:   buf += bsprintf(buf, " (%d", e->pref);
 2033: 
 2034:   if (e->u.bgp.suppressed)
 2035:     buf += bsprintf(buf, "-");
 2036: 
 2037:   if (rte_stale(e))
 2038:     buf += bsprintf(buf, "s");
 2039: 
 2040:   if (e->attrs->hostentry)
 2041:     {
 2042:       if (!rte_resolvable(e))
 2043: 	buf += bsprintf(buf, "/-");
 2044:       else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
 2045: 	buf += bsprintf(buf, "/?");
 2046:       else
 2047: 	buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
 2048:     }
 2049:   buf += bsprintf(buf, ") [");
 2050: 
 2051:   if (p && as_path_get_last(p->u.ptr, &origas))
 2052:     buf += bsprintf(buf, "AS%u", origas);
 2053:   if (o)
 2054:     buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
 2055:   strcpy(buf, "]");
 2056: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>