Annotation of embedaddon/bird/proto/bgp/attrs.c, revision 1.1.1.2

1.1       misho       1: /*
                      2:  *     BIRD -- BGP Attributes
                      3:  *
                      4:  *     (c) 2000 Martin Mares <mj@ucw.cz>
                      5:  *
                      6:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      7:  */
                      8: 
                      9: #undef LOCAL_DEBUG
                     10: 
                     11: #include <stdlib.h>
                     12: 
                     13: #include "nest/bird.h"
                     14: #include "nest/iface.h"
                     15: #include "nest/protocol.h"
                     16: #include "nest/route.h"
                     17: #include "nest/attrs.h"
                     18: #include "conf/conf.h"
                     19: #include "lib/resource.h"
                     20: #include "lib/string.h"
                     21: #include "lib/unaligned.h"
                     22: 
                     23: #include "bgp.h"
                     24: 
                     25: /*
                     26:  *   UPDATE message error handling
                     27:  *
                     28:  * All checks from RFC 4271 6.3 are done as specified with these exceptions:
                     29:  *  - The semantic check of an IP address from NEXT_HOP attribute is missing.
                     30:  *  - Checks of some optional attribute values are missing.
                     31:  *  - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
                     32:  *    are probably inadequate.
                     33:  *
                     34:  * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
                     35:  * 4271 does not explicitly specifiy the behavior in that case.
                     36:  *
                     37:  * Loop detection related to route reflection (based on ORIGINATOR_ID
                     38:  * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
                     39:  * specifies that such updates should be ignored, but that is generally
                     40:  * a bad idea.
                     41:  *
                     42:  * Error checking of optional transitive attributes is done according to
                     43:  * draft-ietf-idr-optional-transitive-03, but errors are handled always
                     44:  * as withdraws.
                     45:  *
                     46:  * Unexpected AS_CONFED_* segments in AS_PATH are logged and removed,
                     47:  * but unknown segments cause a session drop with Malformed AS_PATH
                     48:  * error (see validate_path()). The behavior in such case is not
                     49:  * explicitly specified by RFC 4271. RFC 5065 specifies that
                     50:  * inconsistent AS_CONFED_* segments should cause a session drop, but
                     51:  * implementations that pass invalid AS_CONFED_* segments are
                     52:  * widespread.
                     53:  *
1.1.1.2 ! misho      54:  * Error handling of AS4_* attributes is done as specified by RFC 6793. There
        !            55:  * are several possible inconsistencies between AGGREGATOR and AS4_AGGREGATOR
        !            56:  * that are not handled by that RFC, these are logged and ignored (see
1.1       misho      57:  * bgp_reconstruct_4b_attrs()).
                     58:  */
                     59: 
                     60: 
                     61: static byte bgp_mandatory_attrs[] = { BA_ORIGIN, BA_AS_PATH
                     62: #ifndef IPV6
                     63: ,BA_NEXT_HOP
                     64: #endif
                     65: };
                     66: 
                     67: struct attr_desc {
                     68:   char *name;
                     69:   int expected_length;
                     70:   int expected_flags;
                     71:   int type;
                     72:   int allow_in_ebgp;
                     73:   int (*validate)(struct bgp_proto *p, byte *attr, int len);
                     74:   void (*format)(eattr *ea, byte *buf, int buflen);
                     75: };
                     76: 
                     77: #define IGNORE -1
                     78: #define WITHDRAW -2
                     79: 
                     80: static int
                     81: bgp_check_origin(struct bgp_proto *p UNUSED, byte *a, int len UNUSED)
                     82: {
                     83:   if (*a > 2)
                     84:     return 6;
                     85:   return 0;
                     86: }
                     87: 
                     88: static void
                     89: bgp_format_origin(eattr *a, byte *buf, int buflen UNUSED)
                     90: {
                     91:   static char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
                     92: 
                     93:   bsprintf(buf, bgp_origin_names[a->u.data]);
                     94: }
                     95: 
                     96: static int
                     97: path_segment_contains(byte *p, int bs, u32 asn)
                     98: {
                     99:   int i;
                    100:   int len = p[1];
                    101:   p += 2;
                    102: 
                    103:   for(i=0; i<len; i++)
                    104:     {
                    105:       u32 asn2 = (bs == 4) ? get_u32(p) : get_u16(p);
                    106:       if (asn2 == asn)
                    107:        return 1;
                    108:       p += bs;
                    109:     }
                    110: 
                    111:   return 0;
                    112: }
                    113: 
                    114: /* Validates path attribute, removes AS_CONFED_* segments, and also returns path length */
                    115: static int
                    116: validate_path(struct bgp_proto *p, int as_path, int bs, byte *idata, uint *ilength)
                    117: {
                    118:   int res = 0;
                    119:   u8 *a, *dst;
                    120:   int len, plen;
                    121: 
                    122:   dst = a = idata;
                    123:   len = *ilength;
                    124: 
                    125:   while (len)
                    126:     {
                    127:       if (len < 2)
                    128:        return -1;
                    129: 
                    130:       plen = 2 + bs * a[1];
                    131:       if (len < plen)
                    132:        return -1;
                    133: 
                    134:       if (a[1] == 0)
                    135:         {
                    136:          log(L_WARN "%s: %s_PATH attribute contains empty segment, skipping it",
                    137:              p->p.name, as_path ? "AS" : "AS4");
                    138:          goto skip;
                    139:        }
                    140: 
                    141:       switch (a[0])
                    142:        {
                    143:        case AS_PATH_SET:
                    144:          res++;
                    145:          break;
                    146: 
                    147:        case AS_PATH_SEQUENCE:
                    148:          res += a[1];
                    149:          break;
                    150: 
                    151:        case AS_PATH_CONFED_SEQUENCE:
                    152:        case AS_PATH_CONFED_SET:
                    153:          if (as_path && path_segment_contains(a, bs, p->remote_as))
                    154:            {
                    155:              log(L_WARN "%s: AS_CONFED_* segment with peer ASN found, misconfigured confederation?", p->p.name);
                    156:              return -1;
                    157:            }
                    158: 
                    159:          log(L_WARN "%s: %s_PATH attribute contains AS_CONFED_* segment, skipping segment",
                    160:              p->p.name, as_path ? "AS" : "AS4");
                    161:          goto skip;
                    162: 
                    163:        default:
                    164:          return -1;
                    165:        }
                    166: 
                    167:       if (dst != a)
                    168:        memmove(dst, a, plen);
                    169:       dst += plen;
                    170: 
                    171:     skip:
                    172:       len -= plen;
                    173:       a += plen;
                    174:     }
                    175: 
                    176:   *ilength = dst - idata;
                    177:   return res;
                    178: }
                    179: 
                    180: static inline int
                    181: validate_as_path(struct bgp_proto *p, byte *a, int *len)
                    182: {
                    183:   return validate_path(p, 1, p->as4_session ? 4 : 2, a, len);
                    184: }
                    185: 
                    186: static inline int
                    187: validate_as4_path(struct bgp_proto *p, struct adata *path)
                    188: {
                    189:   return validate_path(p, 0, 4, path->data, &path->length);
                    190: }
                    191: 
                    192: static int
                    193: bgp_check_next_hop(struct bgp_proto *p UNUSED, byte *a UNUSED6, int len UNUSED6)
                    194: {
                    195: #ifdef IPV6
                    196:   return IGNORE;
                    197: #else
                    198:   ip_addr addr;
                    199: 
                    200:   memcpy(&addr, a, len);
                    201:   ipa_ntoh(addr);
                    202:   if (ipa_classify(addr) & IADDR_HOST)
                    203:     return 0;
                    204:   else
                    205:     return 8;
                    206: #endif
                    207: }
                    208: 
                    209: static void
                    210: bgp_format_next_hop(eattr *a, byte *buf, int buflen UNUSED)
                    211: {
                    212:   ip_addr *ipp = (ip_addr *) a->u.ptr->data;
                    213: #ifdef IPV6
                    214:   /* in IPv6, we might have two addresses in NEXT HOP */
                    215:   if ((a->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(ipp[1]))
                    216:     {
                    217:       bsprintf(buf, "%I %I", ipp[0], ipp[1]);
                    218:       return;
                    219:     }
                    220: #endif
                    221: 
                    222:   bsprintf(buf, "%I", ipp[0]);
                    223: }
                    224: 
                    225: static int
                    226: bgp_check_aggregator(struct bgp_proto *p, byte *a UNUSED, int len)
                    227: {
                    228:   int exp_len = p->as4_session ? 8 : 6;
                    229:   
                    230:   return (len == exp_len) ? 0 : WITHDRAW;
                    231: }
                    232: 
                    233: static void
                    234: bgp_format_aggregator(eattr *a, byte *buf, int buflen UNUSED)
                    235: {
                    236:   struct adata *ad =  a->u.ptr;
                    237:   byte *data = ad->data;
                    238:   u32 as;
                    239: 
                    240:   as = get_u32(data);
                    241:   data += 4;
                    242: 
                    243:   bsprintf(buf, "%d.%d.%d.%d AS%u", data[0], data[1], data[2], data[3], as);
                    244: }
                    245: 
                    246: static int
                    247: bgp_check_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
                    248: {
                    249:   return ((len % 4) == 0) ? 0 : WITHDRAW;
                    250: }
                    251: 
                    252: static int
                    253: bgp_check_cluster_list(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
                    254: {
                    255:   return ((len % 4) == 0) ? 0 : 5;
                    256: }
                    257: 
                    258: static void
                    259: bgp_format_cluster_list(eattr *a, byte *buf, int buflen)
                    260: {
                    261:   /* Truncates cluster lists larger than buflen, probably not a problem */
                    262:   int_set_format(a->u.ptr, 0, -1, buf, buflen);
                    263: }
                    264: 
                    265: static int
                    266: bgp_check_reach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
                    267: {
                    268: #ifdef IPV6
                    269:   p->mp_reach_start = a;
                    270:   p->mp_reach_len = len;
                    271: #endif
                    272:   return IGNORE;
                    273: }
                    274: 
                    275: static int
                    276: bgp_check_unreach_nlri(struct bgp_proto *p UNUSED, byte *a UNUSED, int len UNUSED)
                    277: {
                    278: #ifdef IPV6
                    279:   p->mp_unreach_start = a;
                    280:   p->mp_unreach_len = len;
                    281: #endif
                    282:   return IGNORE;
                    283: }
                    284: 
                    285: static int
                    286: bgp_check_ext_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
                    287: {
                    288:   return ((len % 8) == 0) ? 0 : WITHDRAW;
                    289: }
                    290: 
                    291: static int
                    292: bgp_check_large_community(struct bgp_proto *p UNUSED, byte *a UNUSED, int len)
                    293: {
                    294:   return ((len % 12) == 0) ? 0 : WITHDRAW;
                    295: }
                    296: 
                    297: 
                    298: static struct attr_desc bgp_attr_table[] = {
                    299:   { NULL, -1, 0, 0, 0,                                                         /* Undefined */
                    300:     NULL, NULL },
                    301:   { "origin", 1, BAF_TRANSITIVE, EAF_TYPE_INT, 1,                              /* BA_ORIGIN */
                    302:     bgp_check_origin, bgp_format_origin },
                    303:   { "as_path", -1, BAF_TRANSITIVE, EAF_TYPE_AS_PATH, 1,                                /* BA_AS_PATH */
                    304:     NULL, NULL }, /* is checked by validate_as_path() as a special case */
                    305:   { "next_hop", 4, BAF_TRANSITIVE, EAF_TYPE_IP_ADDRESS, 1,                     /* BA_NEXT_HOP */
                    306:     bgp_check_next_hop, bgp_format_next_hop },
                    307:   { "med", 4, BAF_OPTIONAL, EAF_TYPE_INT, 1,                                   /* BA_MULTI_EXIT_DISC */
                    308:     NULL, NULL },
1.1.1.2 ! misho     309:   { "local_pref", 4, BAF_TRANSITIVE, EAF_TYPE_INT, 1,                          /* BA_LOCAL_PREF */
1.1       misho     310:     NULL, NULL },
                    311:   { "atomic_aggr", 0, BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,                      /* BA_ATOMIC_AGGR */
                    312:     NULL, NULL },
                    313:   { "aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,       /* BA_AGGREGATOR */
                    314:     bgp_check_aggregator, bgp_format_aggregator },
                    315:   { "community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_INT_SET, 1,       /* BA_COMMUNITY */
                    316:     bgp_check_community, NULL },
                    317:   { "originator_id", 4, BAF_OPTIONAL, EAF_TYPE_ROUTER_ID, 0,                   /* BA_ORIGINATOR_ID */
                    318:     NULL, NULL },
                    319:   { "cluster_list", -1, BAF_OPTIONAL, EAF_TYPE_INT_SET, 0,                     /* BA_CLUSTER_LIST */
                    320:     bgp_check_cluster_list, bgp_format_cluster_list }, 
                    321:   { .name = NULL },                                                            /* BA_DPA */
                    322:   { .name = NULL },                                                            /* BA_ADVERTISER */
                    323:   { .name = NULL },                                                            /* BA_RCID_PATH */
                    324:   { "mp_reach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,                     /* BA_MP_REACH_NLRI */
                    325:     bgp_check_reach_nlri, NULL },
                    326:   { "mp_unreach_nlri", -1, BAF_OPTIONAL, EAF_TYPE_OPAQUE, 1,                   /* BA_MP_UNREACH_NLRI */
                    327:     bgp_check_unreach_nlri, NULL },
                    328:   { "ext_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_EC_SET, 1,    /* BA_EXT_COMMUNITY */
                    329:     bgp_check_ext_community, NULL },
                    330:   { "as4_path", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,         /* BA_AS4_PATH */
                    331:     NULL, NULL },
                    332:   { "as4_aggregator", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_OPAQUE, 1,   /* BA_AS4_PATH */
                    333:     NULL, NULL },
                    334:   [BA_LARGE_COMMUNITY] =
                    335:   { "large_community", -1, BAF_OPTIONAL | BAF_TRANSITIVE, EAF_TYPE_LC_SET, 1,
                    336:     bgp_check_large_community, NULL }
                    337: };
                    338: 
                    339: /* BA_AS4_PATH is type EAF_TYPE_OPAQUE and not type EAF_TYPE_AS_PATH.
                    340:  * It does not matter as this attribute does not appear on routes in the routing table.
                    341:  */
                    342: 
                    343: #define ATTR_KNOWN(code) ((code) < ARRAY_SIZE(bgp_attr_table) && bgp_attr_table[code].name)
                    344: 
                    345: static inline struct adata *
                    346: bgp_alloc_adata(struct linpool *pool, unsigned len)
                    347: {
                    348:   struct adata *ad = lp_alloc(pool, sizeof(struct adata) + len);
                    349:   ad->length = len;
                    350:   return ad;
                    351: }
                    352: 
                    353: static void
                    354: bgp_set_attr(eattr *e, unsigned attr, uintptr_t val)
                    355: {
                    356:   ASSERT(ATTR_KNOWN(attr));
                    357:   e->id = EA_CODE(EAP_BGP, attr);
                    358:   e->type = bgp_attr_table[attr].type;
                    359:   e->flags = bgp_attr_table[attr].expected_flags;
                    360:   if (e->type & EAF_EMBEDDED)
                    361:     e->u.data = val;
                    362:   else
                    363:     e->u.ptr = (struct adata *) val;
                    364: }
                    365: 
                    366: static byte *
                    367: bgp_set_attr_wa(eattr *e, struct linpool *pool, unsigned attr, unsigned len)
                    368: {
                    369:   struct adata *ad = bgp_alloc_adata(pool, len);
                    370:   bgp_set_attr(e, attr, (uintptr_t) ad);
                    371:   return ad->data;
                    372: }
                    373: 
                    374: void
                    375: bgp_attach_attr(ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val)
                    376: {
                    377:   ea_list *a = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
                    378:   a->next = *to;
                    379:   *to = a;
                    380:   a->flags = EALF_SORTED;
                    381:   a->count = 1;
                    382:   bgp_set_attr(a->attrs, attr, val);
                    383: }
                    384: 
                    385: byte *
                    386: bgp_attach_attr_wa(ea_list **to, struct linpool *pool, unsigned attr, unsigned len)
                    387: {
                    388:   struct adata *ad = bgp_alloc_adata(pool, len);
                    389:   bgp_attach_attr(to, pool, attr, (uintptr_t) ad);
                    390:   return ad->data;
                    391: }
                    392: 
                    393: static int
                    394: bgp_encode_attr_hdr(byte *dst, uint flags, unsigned code, int len)
                    395: {
                    396:   int wlen;
                    397: 
                    398:   DBG("\tAttribute %02x (%d bytes, flags %02x)\n", code, len, flags);
                    399: 
                    400:   if (len < 256)
                    401:     {
                    402:       *dst++ = flags;
                    403:       *dst++ = code;
                    404:       *dst++ = len;
                    405:       wlen = 3;
                    406:     }
                    407:   else
                    408:     {
                    409:       *dst++ = flags | BAF_EXT_LEN;
                    410:       *dst++ = code;
                    411:       put_u16(dst, len);
                    412:       wlen = 4;
                    413:     }
                    414: 
                    415:   return wlen;
                    416: }
                    417: 
                    418: static void
                    419: aggregator_convert_to_old(struct adata *aggr, byte *dst, int *new_used)
                    420: {
                    421:   byte *src = aggr->data;
                    422:   *new_used = 0;
                    423: 
                    424:   u32 as = get_u32(src);
                    425:   if (as > 0xFFFF) 
                    426:     {
                    427:       as = AS_TRANS;
                    428:       *new_used = 1;
                    429:     }
                    430:   put_u16(dst, as);
                    431: 
                    432:   /* Copy IPv4 address */
                    433:   memcpy(dst + 2, src + 4, 4);
                    434: }
                    435: 
                    436: static void
                    437: aggregator_convert_to_new(struct adata *aggr, byte *dst)
                    438: {
                    439:   byte *src = aggr->data;
                    440: 
                    441:   u32 as   = get_u16(src);
                    442:   put_u32(dst, as);
                    443: 
                    444:   /* Copy IPv4 address */
                    445:   memcpy(dst + 4, src + 2, 4);
                    446: }
                    447: 
                    448: static int
                    449: bgp_get_attr_len(eattr *a)
                    450: {
                    451:   int len;
                    452:   if (ATTR_KNOWN(EA_ID(a->id)))
                    453:     {
                    454:       int code = EA_ID(a->id);
                    455:       struct attr_desc *desc = &bgp_attr_table[code];
                    456:       len = desc->expected_length;
                    457:       if (len < 0)
                    458:        {
                    459:          ASSERT(!(a->type & EAF_EMBEDDED));
                    460:          len = a->u.ptr->length;
                    461:        }
                    462:     }
                    463:   else
                    464:     {
                    465:       ASSERT((a->type & EAF_TYPE_MASK) == EAF_TYPE_OPAQUE);
                    466:       len = a->u.ptr->length;
                    467:     }
                    468:   
                    469:   return len;
                    470: }
                    471: 
                    472: /**
                    473:  * bgp_encode_attrs - encode BGP attributes
1.1.1.2 ! misho     474:  * @p: BGP instance (or NULL)
1.1       misho     475:  * @w: buffer
                    476:  * @attrs: a list of extended attributes
                    477:  * @remains: remaining space in the buffer
                    478:  *
                    479:  * The bgp_encode_attrs() function takes a list of extended attributes
                    480:  * and converts it to its BGP representation (a part of an Update message).
                    481:  *
                    482:  * Result: Length of the attribute block generated or -1 if not enough space.
                    483:  */
                    484: uint
                    485: bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains)
                    486: {
                    487:   uint i, code, type, flags;
1.1.1.2 ! misho     488:   int as4_session = p ? p->as4_session : 1;
1.1       misho     489:   byte *start = w;
                    490:   int len, rv;
                    491: 
                    492:   for(i=0; i<attrs->count; i++)
                    493:     {
                    494:       eattr *a = &attrs->attrs[i];
                    495:       ASSERT(EA_PROTO(a->id) == EAP_BGP);
                    496:       code = EA_ID(a->id);
                    497: 
                    498: #ifdef IPV6
                    499:       /* When talking multiprotocol BGP, the NEXT_HOP attributes are used only temporarily. */
                    500:       if (code == BA_NEXT_HOP)
                    501:        continue;
                    502: #endif
                    503: 
                    504:       /* When AS4-aware BGP speaker is talking to non-AS4-aware BGP speaker,
                    505:        * we have to convert our 4B AS_PATH to 2B AS_PATH and send our AS_PATH 
                    506:        * as optional AS4_PATH attribute.
                    507:        */
1.1.1.2 ! misho     508:       if ((code == BA_AS_PATH) && !as4_session)
1.1       misho     509:        {
                    510:          len = a->u.ptr->length;
                    511: 
                    512:          if (remains < (len + 4))
                    513:            goto err_no_buffer;
                    514: 
                    515:          /* Using temporary buffer because don't know a length of created attr
                    516:           * and therefore a length of a header. Perhaps i should better always
                    517:           * use BAF_EXT_LEN. */
                    518:          
                    519:          byte buf[len];
                    520:          int new_used;
                    521:          int nl = as_path_convert_to_old(a->u.ptr, buf, &new_used);
                    522: 
                    523:          DBG("BGP: Encoding old AS_PATH\n");
                    524:          rv = bgp_encode_attr_hdr(w, BAF_TRANSITIVE, BA_AS_PATH, nl);
                    525:          ADVANCE(w, remains, rv);
                    526:          memcpy(w, buf, nl);
                    527:          ADVANCE(w, remains, nl);
                    528: 
                    529:          if (! new_used)
                    530:            continue;
                    531: 
                    532:          if (remains < (len + 4))
                    533:            goto err_no_buffer;
                    534: 
                    535:          /* We should discard AS_CONFED_SEQUENCE or AS_CONFED_SET path segments 
                    536:           * here but we don't support confederations and such paths we already
                    537:           * discarded in bgp_check_as_path().
                    538:           */
                    539: 
                    540:          DBG("BGP: Encoding AS4_PATH\n");
                    541:          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_PATH, len);
                    542:          ADVANCE(w, remains, rv);
                    543:          memcpy(w, a->u.ptr->data, len);
                    544:          ADVANCE(w, remains, len);
                    545: 
                    546:          continue;
                    547:        }
                    548: 
                    549:       /* The same issue with AGGREGATOR attribute */
1.1.1.2 ! misho     550:       if ((code == BA_AGGREGATOR) && !as4_session)
1.1       misho     551:        {
                    552:          int new_used;
                    553: 
                    554:          len = 6;
                    555:          if (remains < (len + 3))
                    556:            goto err_no_buffer;
                    557: 
                    558:          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AGGREGATOR, len);
                    559:          ADVANCE(w, remains, rv);
                    560:          aggregator_convert_to_old(a->u.ptr, w, &new_used);
                    561:          ADVANCE(w, remains, len);
                    562: 
                    563:          if (! new_used)
                    564:            continue;
                    565: 
                    566:          len = 8;
                    567:          if (remains < (len + 3))
                    568:            goto err_no_buffer;
                    569: 
                    570:          rv = bgp_encode_attr_hdr(w, BAF_OPTIONAL | BAF_TRANSITIVE, BA_AS4_AGGREGATOR, len);
                    571:          ADVANCE(w, remains, rv);
                    572:          memcpy(w, a->u.ptr->data, len);
                    573:          ADVANCE(w, remains, len);
                    574: 
                    575:          continue;
                    576:        }
                    577: 
                    578:       /* Standard path continues here ... */
                    579: 
                    580:       type = a->type & EAF_TYPE_MASK;
                    581:       flags = a->flags & (BAF_OPTIONAL | BAF_TRANSITIVE | BAF_PARTIAL);
                    582:       len = bgp_get_attr_len(a);
                    583: 
                    584:       /* Skip empty sets */ 
                    585:       if (((type == EAF_TYPE_INT_SET) || (type == EAF_TYPE_EC_SET) || (type == EAF_TYPE_LC_SET)) && (len == 0))
                    586:        continue; 
                    587: 
                    588:       if (remains < len + 4)
                    589:        goto err_no_buffer;
                    590: 
                    591:       rv = bgp_encode_attr_hdr(w, flags, code, len);
                    592:       ADVANCE(w, remains, rv);
                    593: 
                    594:       switch (type)
                    595:        {
                    596:        case EAF_TYPE_INT:
                    597:        case EAF_TYPE_ROUTER_ID:
                    598:          if (len == 4)
                    599:            put_u32(w, a->u.data);
                    600:          else
                    601:            *w = a->u.data;
                    602:          break;
                    603:        case EAF_TYPE_IP_ADDRESS:
                    604:          {
                    605:            ip_addr ip = *(ip_addr *)a->u.ptr->data;
                    606:            ipa_hton(ip);
                    607:            memcpy(w, &ip, len);
                    608:            break;
                    609:          }
                    610:        case EAF_TYPE_INT_SET:
                    611:        case EAF_TYPE_LC_SET:
                    612:        case EAF_TYPE_EC_SET:
                    613:          {
                    614:            u32 *z = int_set_get_data(a->u.ptr);
                    615:            int i;
                    616:            for(i=0; i<len; i+=4)
                    617:              put_u32(w+i, *z++);
                    618:            break;
                    619:          }
                    620:        case EAF_TYPE_OPAQUE:
                    621:        case EAF_TYPE_AS_PATH:
                    622:          memcpy(w, a->u.ptr->data, len);
                    623:          break;
                    624:        default:
                    625:          bug("bgp_encode_attrs: unknown attribute type %02x", a->type);
                    626:        }
                    627:       ADVANCE(w, remains, len);
                    628:     }
                    629:   return w - start;
                    630: 
                    631:  err_no_buffer:
                    632:   return -1;
                    633: }
                    634: 
                    635: /*
                    636: static void
                    637: bgp_init_prefix(struct fib_node *N)
                    638: {
                    639:   struct bgp_prefix *p = (struct bgp_prefix *) N;
                    640:   p->bucket_node.next = NULL;
                    641: }
                    642: */
                    643: 
                    644: static int
                    645: bgp_compare_u32(const u32 *x, const u32 *y)
                    646: {
                    647:   return (*x < *y) ? -1 : (*x > *y) ? 1 : 0;
                    648: }
                    649: 
                    650: static inline void
                    651: bgp_normalize_int_set(u32 *dest, u32 *src, unsigned cnt)
                    652: {
                    653:   memcpy(dest, src, sizeof(u32) * cnt);
                    654:   qsort(dest, cnt, sizeof(u32), (int(*)(const void *, const void *)) bgp_compare_u32);
                    655: }
                    656: 
                    657: static int
                    658: bgp_compare_ec(const u32 *xp, const u32 *yp)
                    659: {
                    660:   u64 x = ec_get(xp, 0);
                    661:   u64 y = ec_get(yp, 0);
                    662:   return (x < y) ? -1 : (x > y) ? 1 : 0;
                    663: }
                    664: 
                    665: static inline void
                    666: bgp_normalize_ec_set(struct adata *ad, u32 *src, int internal)
                    667: {
                    668:   u32 *dst = int_set_get_data(ad);
                    669: 
                    670:   /* Remove non-transitive communities (EC_TBIT active) on external sessions */
                    671:   if (! internal)
                    672:     {
                    673:       int len = int_set_get_size(ad);
                    674:       u32 *t = dst;
                    675:       int i;
                    676: 
                    677:       for (i=0; i < len; i += 2)
                    678:        {
                    679:          if (src[i] & EC_TBIT)
                    680:            continue;
                    681:          
                    682:          *t++ = src[i];
                    683:          *t++ = src[i+1];
                    684:        }
                    685: 
                    686:       ad->length = (t - dst) * 4;
                    687:     }
                    688:   else
                    689:     memcpy(dst, src, ad->length);
                    690: 
                    691:   qsort(dst, ad->length / 8, 8, (int(*)(const void *, const void *)) bgp_compare_ec);
                    692: }
                    693: 
                    694: static int
                    695: bgp_compare_lc(const u32 *x, const u32 *y)
                    696: {
                    697:   if (x[0] != y[0])
                    698:     return (x[0] > y[0]) ? 1 : -1;
                    699:   if (x[1] != y[1])
                    700:     return (x[1] > y[1]) ? 1 : -1;
                    701:   if (x[2] != y[2])
                    702:     return (x[2] > y[2]) ? 1 : -1;
                    703:   return 0;
                    704: }
                    705: 
                    706: static inline void
                    707: bgp_normalize_lc_set(u32 *dest, u32 *src, unsigned cnt)
                    708: {
                    709:   memcpy(dest, src, LCOMM_LENGTH * cnt);
                    710:   qsort(dest, cnt, LCOMM_LENGTH, (int(*)(const void *, const void *)) bgp_compare_lc);
                    711: }
                    712: 
                    713: static void
                    714: bgp_rehash_buckets(struct bgp_proto *p)
                    715: {
                    716:   struct bgp_bucket **old = p->bucket_hash;
                    717:   struct bgp_bucket **new;
                    718:   unsigned oldn = p->hash_size;
                    719:   unsigned i, e, mask;
                    720:   struct bgp_bucket *b;
                    721: 
                    722:   p->hash_size = p->hash_limit;
                    723:   DBG("BGP: Rehashing bucket table from %d to %d\n", oldn, p->hash_size);
                    724:   p->hash_limit *= 4;
                    725:   if (p->hash_limit >= 65536)
                    726:     p->hash_limit = ~0;
                    727:   new = p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
                    728:   mask = p->hash_size - 1;
                    729:   for (i=0; i<oldn; i++)
                    730:     while (b = old[i])
                    731:       {
                    732:        old[i] = b->hash_next;
                    733:        e = b->hash & mask;
                    734:        b->hash_next = new[e];
                    735:        if (b->hash_next)
                    736:          b->hash_next->hash_prev = b;
                    737:        b->hash_prev = NULL;
                    738:        new[e] = b;
                    739:       }
                    740:   mb_free(old);
                    741: }
                    742: 
                    743: static struct bgp_bucket *
                    744: bgp_new_bucket(struct bgp_proto *p, ea_list *new, unsigned hash)
                    745: {
                    746:   struct bgp_bucket *b;
                    747:   unsigned ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
                    748:   unsigned ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
                    749:   unsigned size = sizeof(struct bgp_bucket) + ea_size_aligned;
                    750:   unsigned i;
                    751:   byte *dest;
                    752:   unsigned index = hash & (p->hash_size - 1);
                    753: 
                    754:   /* Gather total size of non-inline attributes */
                    755:   for (i=0; i<new->count; i++)
                    756:     {
                    757:       eattr *a = &new->attrs[i];
                    758:       if (!(a->type & EAF_EMBEDDED))
                    759:        size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
                    760:     }
                    761: 
                    762:   /* Create the bucket and hash it */
                    763:   b = mb_alloc(p->p.pool, size);
                    764:   b->hash_next = p->bucket_hash[index];
                    765:   if (b->hash_next)
                    766:     b->hash_next->hash_prev = b;
                    767:   p->bucket_hash[index] = b;
                    768:   b->hash_prev = NULL;
                    769:   b->hash = hash;
                    770:   add_tail(&p->bucket_queue, &b->send_node);
                    771:   init_list(&b->prefixes);
                    772:   memcpy(b->eattrs, new, ea_size);
                    773:   dest = ((byte *)b->eattrs) + ea_size_aligned;
                    774: 
                    775:   /* Copy values of non-inline attributes */
                    776:   for (i=0; i<new->count; i++)
                    777:     {
                    778:       eattr *a = &b->eattrs->attrs[i];
                    779:       if (!(a->type & EAF_EMBEDDED))
                    780:        {
                    781:          struct adata *oa = a->u.ptr;
                    782:          struct adata *na = (struct adata *) dest;
                    783:          memcpy(na, oa, sizeof(struct adata) + oa->length);
                    784:          a->u.ptr = na;
                    785:          dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
                    786:        }
                    787:     }
                    788: 
                    789:   /* If needed, rehash */
                    790:   p->hash_count++;
                    791:   if (p->hash_count > p->hash_limit)
                    792:     bgp_rehash_buckets(p);
                    793: 
                    794:   return b;
                    795: }
                    796: 
                    797: static struct bgp_bucket *
                    798: bgp_get_bucket(struct bgp_proto *p, net *n, ea_list *attrs, int originate)
                    799: {
                    800:   ea_list *new;
                    801:   unsigned i, cnt, hash, code;
                    802:   eattr *a, *d;
                    803:   u32 seen = 0;
                    804:   struct bgp_bucket *b;
                    805: 
                    806:   /* Merge the attribute list */
                    807:   new = alloca(ea_scan(attrs));
                    808:   ea_merge(attrs, new);
                    809:   ea_sort(new);
                    810: 
                    811:   /* Normalize attributes */
                    812:   d = new->attrs;
                    813:   cnt = new->count;
                    814:   new->count = 0;
                    815:   for(i=0; i<cnt; i++)
                    816:     {
                    817:       a = &new->attrs[i];
                    818:       if (EA_PROTO(a->id) != EAP_BGP)
                    819:        continue;
                    820:       code = EA_ID(a->id);
                    821:       if (ATTR_KNOWN(code))
                    822:        {
1.1.1.2 ! misho     823:          if (!p->is_internal)
        !           824:            {
        !           825:              if (!bgp_attr_table[code].allow_in_ebgp)
        !           826:                continue;
        !           827:              if ((code == BA_LOCAL_PREF) && !p->cf->allow_local_pref)
        !           828:                continue;
        !           829:            }
1.1       misho     830:          /* The flags might have been zero if the attr was added by filters */
                    831:          a->flags = (a->flags & BAF_PARTIAL) | bgp_attr_table[code].expected_flags;
                    832:          if (code < 32)
                    833:            seen |= 1 << code;
                    834:        }
                    835:       else
                    836:        {
                    837:          /* Don't re-export unknown non-transitive attributes */
                    838:          if (!(a->flags & BAF_TRANSITIVE))
                    839:            continue;
                    840:        }
                    841:       *d = *a;
                    842:       if ((d->type & EAF_ORIGINATED) && !originate && (d->flags & BAF_TRANSITIVE) && (d->flags & BAF_OPTIONAL))
                    843:        d->flags |= BAF_PARTIAL;
                    844:       switch (d->type & EAF_TYPE_MASK)
                    845:        {
                    846:        case EAF_TYPE_INT_SET:
                    847:          {
                    848:            struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
                    849:            z->length = d->u.ptr->length;
                    850:            bgp_normalize_int_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / 4);
                    851:            d->u.ptr = z;
                    852:            break;
                    853:          }
                    854:        case EAF_TYPE_EC_SET:
                    855:          {
                    856:            struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
                    857:            z->length = d->u.ptr->length;
                    858:            bgp_normalize_ec_set(z, (u32 *) d->u.ptr->data, p->is_internal);
                    859:            d->u.ptr = z;
                    860:            break;
                    861:          }
                    862:        case EAF_TYPE_LC_SET:
                    863:          {
                    864:            struct adata *z = alloca(sizeof(struct adata) + d->u.ptr->length);
                    865:            z->length = d->u.ptr->length;
                    866:            bgp_normalize_lc_set((u32 *) z->data, (u32 *) d->u.ptr->data, z->length / LCOMM_LENGTH);
                    867:            d->u.ptr = z;
                    868:            break;
                    869:          }
                    870:        default: ;
                    871:        }
                    872:       d++;
                    873:       new->count++;
                    874:     }
                    875: 
                    876:   /* Hash */
                    877:   hash = ea_hash(new);
                    878:   for(b=p->bucket_hash[hash & (p->hash_size - 1)]; b; b=b->hash_next)
                    879:     if (b->hash == hash && ea_same(b->eattrs, new))
                    880:       {
                    881:        DBG("Found bucket.\n");
                    882:        return b;
                    883:       }
                    884: 
                    885:   /* Ensure that there are all mandatory attributes */
                    886:   for(i=0; i<ARRAY_SIZE(bgp_mandatory_attrs); i++)
                    887:     if (!(seen & (1 << bgp_mandatory_attrs[i])))
                    888:       {
                    889:        log(L_ERR "%s: Mandatory attribute %s missing in route %I/%d", p->p.name, bgp_attr_table[bgp_mandatory_attrs[i]].name, n->n.prefix, n->n.pxlen);
                    890:        return NULL;
                    891:       }
                    892: 
                    893:   /* Check if next hop is valid */
                    894:   a = ea_find(new, EA_CODE(EAP_BGP, BA_NEXT_HOP));
                    895:   if (!a || ipa_equal(p->cf->remote_ip, *(ip_addr *)a->u.ptr->data))
                    896:     {
                    897:       log(L_ERR "%s: Invalid NEXT_HOP attribute in route %I/%d", p->p.name, n->n.prefix, n->n.pxlen);
                    898:       return NULL;
                    899:     }
                    900: 
                    901:   /* Create new bucket */
                    902:   DBG("Creating bucket.\n");
                    903:   return bgp_new_bucket(p, new, hash);
                    904: }
                    905: 
                    906: void
                    907: bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck)
                    908: {
                    909:   if (buck->hash_next)
                    910:     buck->hash_next->hash_prev = buck->hash_prev;
                    911:   if (buck->hash_prev)
                    912:     buck->hash_prev->hash_next = buck->hash_next;
                    913:   else
                    914:     p->bucket_hash[buck->hash & (p->hash_size-1)] = buck->hash_next;
                    915:   mb_free(buck);
                    916: }
                    917: 
                    918: 
                    919: /* Prefix hash table */
                    920: 
                    921: #define PXH_KEY(n1)            n1->n.prefix, n1->n.pxlen, n1->path_id
                    922: #define PXH_NEXT(n)            n->next
                    923: #define PXH_EQ(p1,l1,i1,p2,l2,i2) ipa_equal(p1, p2) && l1 == l2 && i1 == i2
                    924: #define PXH_FN(p,l,i)          ipa_hash32(p) ^ u32_hash((l << 16) ^ i)
                    925: 
                    926: #define PXH_REHASH             bgp_pxh_rehash
1.1.1.2 ! misho     927: #define PXH_PARAMS             /8, *2, 2, 2, 8, 24
1.1       misho     928: 
                    929: 
                    930: HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
                    931: 
                    932: void
                    933: bgp_init_prefix_table(struct bgp_proto *p, u32 order)
                    934: {
                    935:   HASH_INIT(p->prefix_hash, p->p.pool, order);
                    936: 
                    937:   p->prefix_slab = sl_new(p->p.pool, sizeof(struct bgp_prefix));
                    938: }
                    939: 
                    940: void
                    941: bgp_free_prefix_table(struct bgp_proto *p)
                    942: {
                    943:   HASH_FREE(p->prefix_hash);
                    944: 
                    945:   rfree(p->prefix_slab);
                    946:   p->prefix_slab = NULL;
                    947: }
                    948: 
                    949: static struct bgp_prefix *
                    950: bgp_get_prefix(struct bgp_proto *p, ip_addr prefix, int pxlen, u32 path_id)
                    951: {
                    952:   struct bgp_prefix *bp = HASH_FIND(p->prefix_hash, PXH, prefix, pxlen, path_id);
                    953: 
                    954:   if (bp)
                    955:     return bp;
                    956: 
                    957:   bp = sl_alloc(p->prefix_slab);
                    958:   bp->n.prefix = prefix;
                    959:   bp->n.pxlen = pxlen;
                    960:   bp->path_id = path_id;
                    961:   bp->bucket_node.next = NULL;
                    962: 
                    963:   HASH_INSERT2(p->prefix_hash, PXH, p->p.pool, bp);
                    964: 
                    965:   return bp;
                    966: }
                    967: 
                    968: void
                    969: bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp)
                    970: {
                    971:   HASH_REMOVE2(p->prefix_hash, PXH, p->p.pool, bp);
                    972:   sl_free(p->prefix_slab, bp);
                    973: }
                    974: 
                    975: 
                    976: void
                    977: bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs)
                    978: {
                    979:   struct bgp_proto *p = (struct bgp_proto *) P;
                    980:   struct bgp_bucket *buck;
                    981:   struct bgp_prefix *px;
                    982:   rte *key;
                    983:   u32 path_id;
                    984: 
                    985:   DBG("BGP: Got route %I/%d %s\n", n->n.prefix, n->n.pxlen, new ? "up" : "down");
                    986: 
                    987:   if (new)
                    988:     {
                    989:       key = new;
                    990:       buck = bgp_get_bucket(p, n, attrs, new->attrs->source != RTS_BGP);
                    991:       if (!buck)                       /* Inconsistent attribute list */
                    992:        return;
                    993:     }
                    994:   else
                    995:     {
                    996:       key = old;
                    997:       if (!(buck = p->withdraw_bucket))
                    998:        {
                    999:          buck = p->withdraw_bucket = mb_alloc(P->pool, sizeof(struct bgp_bucket));
                   1000:          init_list(&buck->prefixes);
                   1001:        }
                   1002:     }
                   1003:   path_id = p->add_path_tx ? key->attrs->src->global_id : 0;
                   1004:   px = bgp_get_prefix(p, n->n.prefix, n->n.pxlen, path_id);
                   1005:   if (px->bucket_node.next)
                   1006:     {
                   1007:       DBG("\tRemoving old entry.\n");
                   1008:       rem_node(&px->bucket_node);
                   1009:     }
                   1010:   add_tail(&buck->prefixes, &px->bucket_node);
                   1011:   bgp_schedule_packet(p->conn, PKT_UPDATE);
                   1012: }
                   1013: 
                   1014: static int
                   1015: bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool)
                   1016: {
                   1017:   ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr));
                   1018:   rta *rta = e->attrs;
                   1019:   byte *z;
                   1020: 
                   1021:   ea->next = *attrs;
                   1022:   *attrs = ea;
                   1023:   ea->flags = EALF_SORTED;
                   1024:   ea->count = 4;
                   1025: 
                   1026:   bgp_set_attr(ea->attrs, BA_ORIGIN,
                   1027:        ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
                   1028: 
                   1029:   if (p->is_internal)
                   1030:     bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0);
                   1031:   else
                   1032:     {
                   1033:       z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6);
                   1034:       z[0] = AS_PATH_SEQUENCE;
                   1035:       z[1] = 1;                                /* 1 AS */
                   1036:       put_u32(z+2, p->local_as);
                   1037:     }
                   1038: 
                   1039:   /* iBGP -> use gw, eBGP multi-hop -> use source_addr,
                   1040:      eBGP single-hop -> use gw if on the same iface */
                   1041:   z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
                   1042:   if (p->cf->next_hop_self ||
                   1043:       rta->dest != RTD_ROUTER ||
                   1044:       ipa_equal(rta->gw, IPA_NONE) ||
                   1045:       ipa_is_link_local(rta->gw) ||
                   1046:       (!p->is_internal && !p->cf->next_hop_keep &&
                   1047:        (!p->neigh || (rta->iface != p->neigh->iface))))
                   1048:     set_next_hop(z, p->source_addr);
                   1049:   else
                   1050:     set_next_hop(z, rta->gw);
                   1051: 
                   1052:   bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref);
                   1053: 
                   1054:   return 0;                            /* Leave decision to the filters */
                   1055: }
                   1056: 
                   1057: 
                   1058: static inline int
                   1059: bgp_as_path_loopy(struct bgp_proto *p, rta *a)
                   1060: {
                   1061:   int num = p->cf->allow_local_as + 1;
                   1062:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1063:   return (e && (num > 0) && as_path_contains(e->u.ptr, p->local_as, num));
                   1064: }
                   1065: 
                   1066: static inline int
                   1067: bgp_originator_id_loopy(struct bgp_proto *p, rta *a)
                   1068: {
                   1069:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
                   1070:   return (e && (e->u.data == p->local_id));
                   1071: }
                   1072: 
                   1073: static inline int
                   1074: bgp_cluster_list_loopy(struct bgp_proto *p, rta *a)
                   1075: {
                   1076:   eattr *e = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
                   1077:   return (e && p->rr_client && int_set_contains(e->u.ptr, p->rr_cluster_id));
                   1078: }
                   1079: 
                   1080: 
                   1081: static inline void
                   1082: bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as)
                   1083: {
                   1084:   eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1085:   bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as));
                   1086: }
                   1087: 
                   1088: static inline void
                   1089: bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid)
                   1090: {
                   1091:   eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
                   1092:   bgp_attach_attr(attrs, pool, BA_CLUSTER_LIST, (uintptr_t) int_set_prepend(pool, a ? a->u.ptr : NULL, cid));
                   1093: }
                   1094: 
                   1095: static int
                   1096: bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr)
                   1097: {
                   1098:   eattr *a;
                   1099: 
                   1100:   if (!p->is_internal && !p->rs_client)
                   1101:     {
                   1102:       bgp_path_prepend(e, attrs, pool, p->local_as);
                   1103: 
                   1104:       /* The MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be
                   1105:        * propagated to other neighboring ASes.
                   1106:        * Perhaps it would be better to undefine it.
                   1107:        */
                   1108:       a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
                   1109:       if (a)
                   1110:        bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0);
                   1111:     }
                   1112: 
                   1113:   /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr,
                   1114:    * eBGP single-hop -> keep next_hop if on the same iface.
                   1115:    * If the next_hop is zero (i.e. link-local), keep only if on the same iface.
                   1116:    *
                   1117:    * Note that same-iface-check uses iface from route, which is based on gw.
                   1118:    */
                   1119:   a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
                   1120:   if (a && !p->cf->next_hop_self && 
                   1121:       (p->cf->next_hop_keep ||
                   1122:        (p->is_internal && ipa_nonzero(*((ip_addr *) a->u.ptr->data))) ||
                   1123:        (p->neigh && (e->attrs->iface == p->neigh->iface))))
                   1124:     {
                   1125:       /* Leave the original next hop attribute, will check later where does it point */
                   1126:     }
                   1127:   else
                   1128:     {
                   1129:       /* Need to create new one */
                   1130:       byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
                   1131:       set_next_hop(b, p->source_addr);
                   1132:     }
                   1133: 
                   1134:   if (rr)
                   1135:     {
                   1136:       /* Handling route reflection, RFC 4456 */
                   1137:       struct bgp_proto *src = (struct bgp_proto *) e->attrs->src->proto;
                   1138: 
                   1139:       a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
                   1140:       if (!a)
                   1141:        bgp_attach_attr(attrs, pool, BA_ORIGINATOR_ID, src->remote_id);
                   1142: 
                   1143:       /* We attach proper cluster ID according to whether the route is entering or leaving the cluster */
                   1144:       bgp_cluster_list_prepend(e, attrs, pool, src->rr_client ? src->rr_cluster_id : p->rr_cluster_id);
                   1145: 
                   1146:       /* Two RR clients with different cluster ID, hmmm */
                   1147:       if (src->rr_client && p->rr_client && (src->rr_cluster_id != p->rr_cluster_id))
                   1148:        bgp_cluster_list_prepend(e, attrs, pool, p->rr_cluster_id);
                   1149:     }
                   1150: 
                   1151:   return 0;                            /* Leave decision to the filters */
                   1152: }
                   1153: 
                   1154: static int
                   1155: bgp_community_filter(struct bgp_proto *p, rte *e)
                   1156: {
                   1157:   eattr *a;
                   1158:   struct adata *d;
                   1159: 
                   1160:   /* Check if we aren't forbidden to export the route by communities */
                   1161:   a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
                   1162:   if (a)
                   1163:     {
                   1164:       d = a->u.ptr;
                   1165:       if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
                   1166:        {
                   1167:          DBG("\tNO_ADVERTISE\n");
                   1168:          return 1;
                   1169:        }
                   1170:       if (!p->is_internal &&
                   1171:          (int_set_contains(d, BGP_COMM_NO_EXPORT) ||
                   1172:           int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED)))
                   1173:        {
                   1174:          DBG("\tNO_EXPORT\n");
                   1175:          return 1;
                   1176:        }
1.1.1.2 ! misho    1177: 
        !          1178:       if (!p->conn->peer_llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
        !          1179:        return 1;
1.1       misho    1180:     }
                   1181: 
                   1182:   return 0;
                   1183: }
                   1184: 
                   1185: int
                   1186: bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool *pool)
                   1187: {
                   1188:   rte *e = *new;
                   1189:   struct bgp_proto *p = (struct bgp_proto *) P;
                   1190:   struct bgp_proto *new_bgp = (e->attrs->src->proto->proto == &proto_bgp) ?
                   1191:     (struct bgp_proto *) e->attrs->src->proto : NULL;
                   1192: 
                   1193:   if (p == new_bgp)                    /* Poison reverse updates */
                   1194:     return -1;
                   1195:   if (new_bgp)
                   1196:     {
                   1197:       /* We should check here for cluster list loop, because the receiving BGP instance
                   1198:         might have different cluster ID  */
                   1199:       if (bgp_cluster_list_loopy(p, e->attrs))
                   1200:        return -1;
                   1201: 
                   1202:       if (p->cf->interpret_communities && bgp_community_filter(p, e))
                   1203:        return -1;
                   1204: 
                   1205:       if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal)
                   1206:        {
                   1207:          /* Redistribution of internal routes with IBGP */
                   1208:          if (p->rr_client || new_bgp->rr_client)
                   1209:            /* Route reflection, RFC 4456 */
                   1210:            return bgp_update_attrs(p, e, attrs, pool, 1);
                   1211:          else
                   1212:            return -1;
                   1213:        }
                   1214:       else
                   1215:        return bgp_update_attrs(p, e, attrs, pool, 0);
                   1216:     }
                   1217:   else
                   1218:     return bgp_create_attrs(p, e, attrs, pool);
                   1219: }
                   1220: 
                   1221: static inline u32
                   1222: bgp_get_neighbor(rte *r)
                   1223: {
                   1224:   eattr *e = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1225:   u32 as;
                   1226: 
                   1227:   if (e && as_path_get_first(e->u.ptr, &as))
                   1228:     return as;
                   1229:   else
                   1230:     return ((struct bgp_proto *) r->attrs->src->proto)->remote_as;
                   1231: }
                   1232: 
                   1233: static inline int
                   1234: rte_resolvable(rte *rt)
                   1235: {
                   1236:   int rd = rt->attrs->dest;  
                   1237:   return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
                   1238: }
                   1239: 
1.1.1.2 ! misho    1240: static inline int
        !          1241: rte_stale(rte *r)
        !          1242: {
        !          1243:   if (r->u.bgp.stale < 0)
        !          1244:   {
        !          1245:     /* If staleness is unknown, compute and cache it */
        !          1246:     eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
        !          1247:     r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
        !          1248:   }
        !          1249: 
        !          1250:   return r->u.bgp.stale;
        !          1251: }
        !          1252: 
1.1       misho    1253: int
                   1254: bgp_rte_better(rte *new, rte *old)
                   1255: {
                   1256:   struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
                   1257:   struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
                   1258:   eattr *x, *y;
                   1259:   u32 n, o;
                   1260: 
                   1261:   /* Skip suppressed routes (see bgp_rte_recalculate()) */
                   1262:   n = new->u.bgp.suppressed;
                   1263:   o = old->u.bgp.suppressed;
                   1264:   if (n > o)
                   1265:     return 0;
                   1266:   if (n < o)
                   1267:     return 1;
                   1268: 
                   1269:   /* RFC 4271 9.1.2.1. Route resolvability test */
                   1270:   n = rte_resolvable(new);
                   1271:   o = rte_resolvable(old);
                   1272:   if (n > o)
                   1273:     return 1;
                   1274:   if (n < o)
                   1275:     return 0;
                   1276: 
1.1.1.2 ! misho    1277:   /* LLGR draft - depreference stale routes */
        !          1278:   n = rte_stale(new);
        !          1279:   o = rte_stale(old);
        !          1280:   if (n > o)
        !          1281:     return 0;
        !          1282:   if (n < o)
        !          1283:     return 1;
        !          1284: 
1.1       misho    1285:   /* Start with local preferences */
                   1286:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
                   1287:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
                   1288:   n = x ? x->u.data : new_bgp->cf->default_local_pref;
                   1289:   o = y ? y->u.data : old_bgp->cf->default_local_pref;
                   1290:   if (n > o)
                   1291:     return 1;
                   1292:   if (n < o)
                   1293:     return 0;
                   1294: 
                   1295:   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
                   1296:   if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
                   1297:     {
                   1298:       x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1299:       y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1300:       n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
                   1301:       o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
                   1302:       if (n < o)
                   1303:        return 1;
                   1304:       if (n > o)
                   1305:        return 0;
                   1306:     }
                   1307: 
                   1308:   /* RFC 4271 9.1.2.2. b) Use origins */
                   1309:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
                   1310:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
                   1311:   n = x ? x->u.data : ORIGIN_INCOMPLETE;
                   1312:   o = y ? y->u.data : ORIGIN_INCOMPLETE;
                   1313:   if (n < o)
                   1314:     return 1;
                   1315:   if (n > o)
                   1316:     return 0;
                   1317: 
                   1318:   /* RFC 4271 9.1.2.2. c) Compare MED's */
                   1319:   /* Proper RFC 4271 path selection cannot be interpreted as finding
                   1320:    * the best path in some ordering. It is implemented partially in
                   1321:    * bgp_rte_recalculate() when deterministic_med option is
                   1322:    * active. Without that option, the behavior is just an
                   1323:    * approximation, which in specific situations may lead to
                   1324:    * persistent routing loops, because it is nondeterministic - it
                   1325:    * depends on the order in which routes appeared. But it is also the
                   1326:    * same behavior as used by default in Cisco routers, so it is
                   1327:    * probably not a big issue.
                   1328:    */
                   1329:   if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
                   1330:       (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
                   1331:     {
                   1332:       x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
                   1333:       y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
                   1334:       n = x ? x->u.data : new_bgp->cf->default_med;
                   1335:       o = y ? y->u.data : old_bgp->cf->default_med;
                   1336:       if (n < o)
                   1337:        return 1;
                   1338:       if (n > o)
                   1339:        return 0;
                   1340:     }
                   1341: 
                   1342:   /* RFC 4271 9.1.2.2. d) Prefer external peers */
                   1343:   if (new_bgp->is_internal > old_bgp->is_internal)
                   1344:     return 0;
                   1345:   if (new_bgp->is_internal < old_bgp->is_internal)
                   1346:     return 1;
                   1347: 
                   1348:   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
                   1349:   n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
                   1350:   o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
                   1351:   if (n < o)
                   1352:     return 1;
                   1353:   if (n > o)
                   1354:     return 0;
                   1355: 
                   1356:   /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
                   1357:   /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
                   1358:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
                   1359:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
                   1360:   n = x ? x->u.data : new_bgp->remote_id;
                   1361:   o = y ? y->u.data : old_bgp->remote_id;
                   1362: 
                   1363:   /* RFC 5004 - prefer older routes */
                   1364:   /* (if both are external and from different peer) */
                   1365:   if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
                   1366:       !new_bgp->is_internal && n != o)
                   1367:     return 0;
                   1368: 
                   1369:   /* rest of RFC 4271 9.1.2.2. f) */
                   1370:   if (n < o)
                   1371:     return 1;
                   1372:   if (n > o)
                   1373:     return 0;
                   1374: 
                   1375:   /* RFC 4456 9. b) Compare cluster list lengths */
                   1376:   x = ea_find(new->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
                   1377:   y = ea_find(old->attrs->eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
                   1378:   n = x ? int_set_get_size(x->u.ptr) : 0;
                   1379:   o = y ? int_set_get_size(y->u.ptr) : 0;
                   1380:   if (n < o)
                   1381:     return 1;
                   1382:   if (n > o)
                   1383:     return 0;
                   1384: 
                   1385:   /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
                   1386:   return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
                   1387: }
                   1388: 
                   1389: 
                   1390: int
                   1391: bgp_rte_mergable(rte *pri, rte *sec)
                   1392: {
                   1393:   struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
                   1394:   struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
                   1395:   eattr *x, *y;
                   1396:   u32 p, s;
                   1397: 
                   1398:   /* Skip suppressed routes (see bgp_rte_recalculate()) */
                   1399:   if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
                   1400:     return 0;
                   1401: 
                   1402:   /* RFC 4271 9.1.2.1. Route resolvability test */
                   1403:   if (!rte_resolvable(sec))
                   1404:     return 0;
                   1405: 
1.1.1.2 ! misho    1406:   /* LLGR draft - depreference stale routes */
        !          1407:   if (rte_stale(pri) != rte_stale(sec))
        !          1408:     return 0;
        !          1409: 
1.1       misho    1410:   /* Start with local preferences */
                   1411:   x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
                   1412:   y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
                   1413:   p = x ? x->u.data : pri_bgp->cf->default_local_pref;
                   1414:   s = y ? y->u.data : sec_bgp->cf->default_local_pref;
                   1415:   if (p != s)
                   1416:     return 0;
                   1417: 
                   1418:   /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
                   1419:   if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
                   1420:     {
                   1421:       x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1422:       y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1423:       p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
                   1424:       s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
                   1425: 
                   1426:       if (p != s)
                   1427:        return 0;
                   1428: 
                   1429: //      if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
                   1430: //     return 0;
                   1431:     }
                   1432: 
                   1433:   /* RFC 4271 9.1.2.2. b) Use origins */
                   1434:   x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
                   1435:   y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
                   1436:   p = x ? x->u.data : ORIGIN_INCOMPLETE;
                   1437:   s = y ? y->u.data : ORIGIN_INCOMPLETE;
                   1438:   if (p != s)
                   1439:     return 0;
                   1440: 
                   1441:   /* RFC 4271 9.1.2.2. c) Compare MED's */
                   1442:   if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
                   1443:       (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
                   1444:     {
                   1445:       x = ea_find(pri->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
                   1446:       y = ea_find(sec->attrs->eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
                   1447:       p = x ? x->u.data : pri_bgp->cf->default_med;
                   1448:       s = y ? y->u.data : sec_bgp->cf->default_med;
                   1449:       if (p != s)
                   1450:        return 0;
                   1451:     }
                   1452: 
                   1453:   /* RFC 4271 9.1.2.2. d) Prefer external peers */
                   1454:   if (pri_bgp->is_internal != sec_bgp->is_internal)
                   1455:     return 0;
                   1456: 
                   1457:   /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
                   1458:   p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
                   1459:   s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
                   1460:   if (p != s)
                   1461:     return 0;
                   1462: 
                   1463:   /* Remaining criteria are ignored */
                   1464: 
                   1465:   return 1;
                   1466: }
                   1467: 
                   1468: 
                   1469: 
                   1470: static inline int
                   1471: same_group(rte *r, u32 lpref, u32 lasn)
                   1472: {
                   1473:   return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
                   1474: }
                   1475: 
                   1476: static inline int
                   1477: use_deterministic_med(rte *r)
                   1478: {
                   1479:   struct proto *P = r->attrs->src->proto;
                   1480:   return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
                   1481: }
                   1482: 
                   1483: int
                   1484: bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
                   1485: {
                   1486:   rte *r, *s;
                   1487:   rte *key = new ? new : old;
                   1488:   u32 lpref = key->pref;
                   1489:   u32 lasn = bgp_get_neighbor(key);
                   1490:   int old_is_group_best = 0;
                   1491: 
                   1492:   /*
                   1493:    * Proper RFC 4271 path selection is a bit complicated, it cannot be
                   1494:    * implemented just by rte_better(), because it is not a linear
                   1495:    * ordering. But it can be splitted to two levels, where the lower
                   1496:    * level chooses the best routes in each group of routes from the
                   1497:    * same neighboring AS and higher level chooses the best route (with
                   1498:    * a slightly different ordering) between the best-in-group routes.
                   1499:    *
                   1500:    * When deterministic_med is disabled, we just ignore this issue and
                   1501:    * choose the best route by bgp_rte_better() alone. If enabled, the
                   1502:    * lower level of the route selection is done here (for the group
                   1503:    * to which the changed route belongs), all routes in group are
                   1504:    * marked as suppressed, just chosen best-in-group is not.
                   1505:    *
                   1506:    * Global best route selection then implements higher level by
                   1507:    * choosing between non-suppressed routes (as they are always
                   1508:    * preferred over suppressed routes). Routes from BGP protocols
                   1509:    * that do not set deterministic_med are just never suppressed. As
                   1510:    * they do not participate in the lower level selection, it is OK
                   1511:    * that this fn is not called for them.
                   1512:    *
                   1513:    * The idea is simple, the implementation is more problematic,
                   1514:    * mostly because of optimizations in rte_recalculate() that 
                   1515:    * avoids full recalculation in most cases.
                   1516:    *
                   1517:    * We can assume that at least one of new, old is non-NULL and both
                   1518:    * are from the same protocol with enabled deterministic_med. We
                   1519:    * group routes by both neighbor AS (lasn) and preference (lpref),
                   1520:    * because bgp_rte_better() does not handle preference itself.
                   1521:    */
                   1522: 
                   1523:   /* If new and old are from different groups, we just process that
                   1524:      as two independent events */
                   1525:   if (new && old && !same_group(old, lpref, lasn))
                   1526:     {
                   1527:       int i1, i2;
                   1528:       i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
                   1529:       i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
                   1530:       return i1 || i2;
                   1531:     }
                   1532: 
                   1533:   /* 
                   1534:    * We could find the best-in-group and then make some shortcuts like
                   1535:    * in rte_recalculate, but as we would have to walk through all
                   1536:    * net->routes just to find it, it is probably not worth. So we
                   1537:    * just have two simpler fast cases that use just the old route.
                   1538:    * We also set suppressed flag to avoid using it in bgp_rte_better().
                   1539:    */
                   1540: 
                   1541:   if (new)
                   1542:     new->u.bgp.suppressed = 1;
                   1543: 
                   1544:   if (old)
                   1545:     {
                   1546:       old_is_group_best = !old->u.bgp.suppressed;
                   1547:       old->u.bgp.suppressed = 1;
                   1548:       int new_is_better = new && bgp_rte_better(new, old);
                   1549: 
                   1550:       /* The first case - replace not best with worse (or remove not best) */
                   1551:       if (!old_is_group_best && !new_is_better)
                   1552:        return 0;
                   1553: 
                   1554:       /* The second case - replace the best with better */
                   1555:       if (old_is_group_best && new_is_better)
                   1556:        {
                   1557:          /* new is best-in-group, the see discussion below - this is
                   1558:             a special variant of NBG && OBG. From OBG we can deduce
                   1559:             that same_group(old_best) iff (old == old_best)  */
                   1560:          new->u.bgp.suppressed = 0;
                   1561:          return (old == old_best);
                   1562:        }
                   1563:     }
                   1564: 
                   1565:   /* The default case - find a new best-in-group route */
                   1566:   r = new; /* new may not be in the list */
                   1567:   for (s=net->routes; rte_is_valid(s); s=s->next)
                   1568:     if (use_deterministic_med(s) && same_group(s, lpref, lasn))
                   1569:       {
                   1570:        s->u.bgp.suppressed = 1;
                   1571:        if (!r || bgp_rte_better(s, r))
                   1572:          r = s;
                   1573:       }
                   1574: 
                   1575:   /* Simple case - the last route in group disappears */
                   1576:   if (!r)
                   1577:     return 0;
                   1578: 
                   1579:   /* Found best-in-group */
                   1580:   r->u.bgp.suppressed = 0;
                   1581: 
                   1582:   /*
                   1583:    * There are generally two reasons why we have to force
                   1584:    * recalculation (return 1): First, the new route may be wrongfully
                   1585:    * chosen to be the best in the first case check in
                   1586:    * rte_recalculate(), this may happen only if old_best is from the
                   1587:    * same group. Second, another (different than new route)
                   1588:    * best-in-group is chosen and that may be the proper best (although
                   1589:    * rte_recalculate() without ignore that possibility).
                   1590:    *
                   1591:    * There are three possible cases according to whether the old route
                   1592:    * was the best in group (OBG, stored in old_is_group_best) and
                   1593:    * whether the new route is the best in group (NBG, tested by r == new).
                   1594:    * These cases work even if old or new is NULL.
                   1595:    *
                   1596:    * NBG -> new is a possible candidate for the best route, so we just
                   1597:    *        check for the first reason using same_group().
                   1598:    *
                   1599:    * !NBG && OBG -> Second reason applies, return 1
                   1600:    *
                   1601:    * !NBG && !OBG -> Best in group does not change, old != old_best,
                   1602:    *                 rte_better(new, old_best) is false and therefore
                   1603:    *                 the first reason does not apply, return 0
                   1604:    */
                   1605: 
                   1606:   if (r == new)
                   1607:     return old_best && same_group(old_best, lpref, lasn);
                   1608:   else
                   1609:     return old_is_group_best;
                   1610: }
                   1611: 
1.1.1.2 ! misho    1612: struct rte *
        !          1613: bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
        !          1614: {
        !          1615:   eattr *a = ea_find(r->attrs->eattrs, EA_CODE(EAP_BGP, BA_COMMUNITY));
        !          1616:   struct adata *ad = a ? a->u.ptr : NULL;
        !          1617: 
        !          1618:   if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
        !          1619:     return NULL;
        !          1620: 
        !          1621:   if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
        !          1622:     return r;
        !          1623: 
        !          1624:   r = rte_cow_rta(r, pool);
        !          1625:   bgp_attach_attr(&(r->attrs->eattrs), pool, BA_COMMUNITY,
        !          1626:                  (uintptr_t) int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
        !          1627:   r->u.bgp.stale = 1;
        !          1628: 
        !          1629:   return r;
        !          1630: }
        !          1631: 
        !          1632: 
1.1       misho    1633: static struct adata *
                   1634: bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
                   1635: {
                   1636:   struct adata *newa = lp_alloc(pool, sizeof(struct adata) + 8);
                   1637:   newa->length = 8;
                   1638:   aggregator_convert_to_new(old, newa->data);
                   1639:   return newa;
                   1640: }
                   1641: 
                   1642: /* Take last req_as ASNs from path old2 (in 2B format), convert to 4B format
                   1643:  * and append path old4 (in 4B format).
                   1644:  */
                   1645: static struct adata *
                   1646: bgp_merge_as_paths(struct adata *old2, struct adata *old4, int req_as, struct linpool *pool)
                   1647: {
                   1648:   byte buf[old2->length * 2];
                   1649: 
                   1650:   int ol = as_path_convert_to_new(old2, buf, req_as);
                   1651:   int nl = ol + (old4 ? old4->length : 0);
                   1652: 
                   1653:   struct adata *newa = lp_alloc(pool, sizeof(struct adata) + nl);
                   1654:   newa->length = nl;
                   1655:   memcpy(newa->data, buf, ol);
                   1656:   if (old4) memcpy(newa->data + ol, old4->data, old4->length);
                   1657: 
                   1658:   return newa;
                   1659: }
                   1660: 
                   1661: static int
                   1662: as4_aggregator_valid(struct adata *aggr)
                   1663: {
                   1664:   return aggr->length == 8;
                   1665: }
                   1666: 
                   1667: 
                   1668: /* Reconstruct 4B AS_PATH and AGGREGATOR according to RFC 4893 4.2.3 */
                   1669: static void
                   1670: bgp_reconstruct_4b_atts(struct bgp_proto *p, rta *a, struct linpool *pool)
                   1671: {
                   1672:   eattr *p2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   1673:   eattr *p4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_PATH));
                   1674:   eattr *a2 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AGGREGATOR));
                   1675:   eattr *a4 =ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR));
                   1676:   int a4_removed = 0;
                   1677: 
                   1678:   if (a4 && !as4_aggregator_valid(a4->u.ptr))
                   1679:     {
                   1680:       log(L_WARN "%s: AS4_AGGREGATOR attribute is invalid, skipping attribute", p->p.name);
                   1681:       a4 = NULL;
                   1682:       a4_removed = 1;
                   1683:     }
                   1684: 
                   1685:   if (a2)
                   1686:     {
                   1687:       u32 a2_as = get_u16(a2->u.ptr->data);
                   1688: 
                   1689:       if (a4)
                   1690:        {
                   1691:          if (a2_as != AS_TRANS)
                   1692:            {
                   1693:              /* Routes were aggregated by old router and therefore AS4_PATH
                   1694:               * and AS4_AGGREGATOR is invalid
                   1695:               *
                   1696:               * Convert AS_PATH and AGGREGATOR to 4B format and finish.
                   1697:               */
                   1698: 
                   1699:              a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
                   1700:              p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
                   1701: 
                   1702:              return;
                   1703:            }
                   1704:          else
                   1705:            {
                   1706:              /* Common case, use AS4_AGGREGATOR attribute */
                   1707:              a2->u.ptr = a4->u.ptr;
                   1708:            }
                   1709:        }
                   1710:       else
                   1711:        {
                   1712:          /* Common case, use old AGGREGATOR attribute */
                   1713:          a2->u.ptr = bgp_aggregator_convert_to_new(a2->u.ptr, pool);
                   1714: 
                   1715:          if ((a2_as == AS_TRANS) && !a4_removed)
                   1716:            log(L_WARN "%s: AGGREGATOR attribute contain AS_TRANS, but AS4_AGGREGATOR is missing", p->p.name);
                   1717:        }
                   1718:     }
                   1719:   else
                   1720:     if (a4)
                   1721:       log(L_WARN "%s: AS4_AGGREGATOR attribute received, but AGGREGATOR attribute is missing", p->p.name);
                   1722: 
                   1723:   int p2_len = as_path_getlen_int(p2->u.ptr, 2);
                   1724:   int p4_len = p4 ? validate_as4_path(p, p4->u.ptr) : -1;
                   1725: 
                   1726:   if (p4 && (p4_len < 0))
                   1727:     log(L_WARN "%s: AS4_PATH attribute is malformed, skipping attribute", p->p.name);
                   1728: 
                   1729:   if ((p4_len <= 0) || (p2_len < p4_len))
                   1730:     p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, NULL, AS_PATH_MAXLEN, pool);
                   1731:   else
                   1732:     p2->u.ptr = bgp_merge_as_paths(p2->u.ptr, p4->u.ptr, p2_len - p4_len, pool);
                   1733: }
                   1734: 
                   1735: static void
                   1736: bgp_remove_as4_attrs(struct bgp_proto *p, rta *a)
                   1737: {
                   1738:   unsigned id1 = EA_CODE(EAP_BGP, BA_AS4_PATH);
                   1739:   unsigned id2 = EA_CODE(EAP_BGP, BA_AS4_AGGREGATOR);
                   1740:   ea_list **el = &(a->eattrs);
                   1741: 
                   1742:   /* We know that ea_lists constructed in bgp_decode attrs have one attribute per ea_list struct */
                   1743:   while (*el != NULL)
                   1744:     {
                   1745:       unsigned fid = (*el)->attrs[0].id;
                   1746: 
                   1747:       if ((fid == id1) || (fid == id2))
                   1748:        {
                   1749:          *el = (*el)->next;
                   1750:          if (p->as4_session)
                   1751:            log(L_WARN "%s: Unexpected AS4_* attributes received", p->p.name);
                   1752:        }
                   1753:       else
                   1754:        el = &((*el)->next);
                   1755:     }
                   1756: }
                   1757: 
                   1758: /**
                   1759:  * bgp_decode_attrs - check and decode BGP attributes
                   1760:  * @conn: connection
                   1761:  * @attr: start of attribute block
                   1762:  * @len: length of attribute block
                   1763:  * @pool: linear pool to make all the allocations in
                   1764:  * @mandatory: 1 iff presence of mandatory attributes has to be checked
                   1765:  *
                   1766:  * This function takes a BGP attribute block (a part of an Update message), checks
                   1767:  * its consistency and converts it to a list of BIRD route attributes represented
                   1768:  * by a &rta.
                   1769:  */
                   1770: struct rta *
                   1771: bgp_decode_attrs(struct bgp_conn *conn, byte *attr, uint len, struct linpool *pool, int mandatory)
                   1772: {
                   1773:   struct bgp_proto *bgp = conn->bgp;
                   1774:   rta *a = lp_alloc(pool, sizeof(struct rta));
                   1775:   uint flags, code, l, i, type;
                   1776:   int errcode;
                   1777:   byte *z, *attr_start;
                   1778:   byte seen[256/8];
                   1779:   ea_list *ea;
                   1780:   struct adata *ad;
                   1781:   int withdraw = 0;
                   1782: 
                   1783:   bzero(a, sizeof(rta));
                   1784:   a->source = RTS_BGP;
                   1785:   a->scope = SCOPE_UNIVERSE;
                   1786:   a->cast = RTC_UNICAST;
                   1787:   /* a->dest = RTD_ROUTER;  -- set in bgp_set_next_hop() */
                   1788:   a->from = bgp->cf->remote_ip;
                   1789: 
                   1790:   /* Parse the attributes */
                   1791:   bzero(seen, sizeof(seen));
                   1792:   DBG("BGP: Parsing attributes\n");
                   1793:   while (len)
                   1794:     {
                   1795:       if (len < 2)
                   1796:        goto malformed;
                   1797:       attr_start = attr;
                   1798:       flags = *attr++;
                   1799:       code = *attr++;
                   1800:       len -= 2;
                   1801:       if (flags & BAF_EXT_LEN)
                   1802:        {
                   1803:          if (len < 2)
                   1804:            goto malformed;
                   1805:          l = get_u16(attr);
                   1806:          attr += 2;
                   1807:          len -= 2;
                   1808:        }
                   1809:       else
                   1810:        {
                   1811:          if (len < 1)
                   1812:            goto malformed;
                   1813:          l = *attr++;
                   1814:          len--;
                   1815:        }
                   1816:       if (l > len)
                   1817:        goto malformed;
                   1818:       len -= l;
                   1819:       z = attr;
                   1820:       attr += l;
                   1821:       DBG("Attr %02x %02x %d\n", code, flags, l);
                   1822:       if (seen[code/8] & (1 << (code%8)))
                   1823:        goto malformed;
                   1824:       if (ATTR_KNOWN(code))
                   1825:        {
                   1826:          struct attr_desc *desc = &bgp_attr_table[code];
                   1827:          if (desc->expected_length >= 0 && desc->expected_length != (int) l)
                   1828:            { errcode = 5; goto err; }
                   1829:          if ((desc->expected_flags ^ flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
                   1830:            { errcode = 4; goto err; }
1.1.1.2 ! misho    1831:          if (!bgp->is_internal)
        !          1832:            {
        !          1833:              if (!desc->allow_in_ebgp)
        !          1834:                continue;
        !          1835:              if ((code == BA_LOCAL_PREF) && !bgp->cf->allow_local_pref)
        !          1836:                continue;
        !          1837:            }
1.1       misho    1838:          if (desc->validate)
                   1839:            {
                   1840:              errcode = desc->validate(bgp, z, l);
                   1841:              if (errcode > 0)
                   1842:                goto err;
                   1843:              if (errcode == IGNORE)
                   1844:                continue;
                   1845:              if (errcode <= WITHDRAW)
                   1846:                {
                   1847:                  log(L_WARN "%s: Attribute %s is malformed, withdrawing update",
                   1848:                      bgp->p.name, desc->name);
                   1849:                  withdraw = 1;
                   1850:                }
                   1851:            }
                   1852:          else if (code == BA_AS_PATH)
                   1853:            {
                   1854:              /* Special case as it might also trim the attribute */
                   1855:              if (validate_as_path(bgp, z, &l) < 0)
                   1856:                { errcode = 11; goto err; }
                   1857:            }
                   1858:          type = desc->type;
                   1859:        }
                   1860:       else                             /* Unknown attribute */
                   1861:        {
                   1862:          if (!(flags & BAF_OPTIONAL))
                   1863:            { errcode = 2; goto err; }
                   1864:          type = EAF_TYPE_OPAQUE;
                   1865:        }
                   1866:       
                   1867:       // Only OPTIONAL and TRANSITIVE attributes may have non-zero PARTIAL flag
                   1868:       // if (!((flags & BAF_OPTIONAL) && (flags & BAF_TRANSITIVE)) && (flags & BAF_PARTIAL))
                   1869:       //   { errcode = 4; goto err; }
                   1870: 
                   1871:       seen[code/8] |= (1 << (code%8));
                   1872:       ea = lp_alloc(pool, sizeof(ea_list) + sizeof(eattr));
                   1873:       ea->next = a->eattrs;
                   1874:       a->eattrs = ea;
                   1875:       ea->flags = 0;
                   1876:       ea->count = 1;
                   1877:       ea->attrs[0].id = EA_CODE(EAP_BGP, code);
                   1878:       ea->attrs[0].flags = flags;
                   1879:       ea->attrs[0].type = type;
                   1880:       if (type & EAF_EMBEDDED)
                   1881:        ad = NULL;
                   1882:       else
                   1883:        {
                   1884:          ad = lp_alloc(pool, sizeof(struct adata) + l);
                   1885:          ea->attrs[0].u.ptr = ad;
                   1886:          ad->length = l;
                   1887:          memcpy(ad->data, z, l);
                   1888:        }
                   1889:       switch (type)
                   1890:        {
                   1891:        case EAF_TYPE_ROUTER_ID:
                   1892:        case EAF_TYPE_INT:
                   1893:          if (l == 1)
                   1894:            ea->attrs[0].u.data = *z;
                   1895:          else
                   1896:            ea->attrs[0].u.data = get_u32(z);
                   1897:          break;
                   1898:        case EAF_TYPE_IP_ADDRESS:
                   1899:          ipa_ntoh(*(ip_addr *)ad->data);
                   1900:          break;
                   1901:        case EAF_TYPE_INT_SET:
                   1902:        case EAF_TYPE_LC_SET:
                   1903:        case EAF_TYPE_EC_SET:
                   1904:          {
                   1905:            u32 *z = (u32 *) ad->data;
                   1906:            for(i=0; i<ad->length/4; i++)
                   1907:              z[i] = ntohl(z[i]);
                   1908:            break;
                   1909:          }
                   1910:        }
                   1911:     }
                   1912: 
                   1913:   if (withdraw)
                   1914:     goto withdraw;
                   1915: 
                   1916: #ifdef IPV6
                   1917:   /* If we received MP_REACH_NLRI we should check mandatory attributes */
                   1918:   if (bgp->mp_reach_len != 0)
                   1919:     mandatory = 1;
                   1920: #endif
                   1921: 
                   1922:   /* If there is no (reachability) NLRI, we should exit now */
                   1923:   if (! mandatory)
                   1924:     return a;
                   1925: 
                   1926:   /* Check if all mandatory attributes are present */
                   1927:   for(i=0; i < ARRAY_SIZE(bgp_mandatory_attrs); i++)
                   1928:     {
                   1929:       code = bgp_mandatory_attrs[i];
                   1930:       if (!(seen[code/8] & (1 << (code%8))))
                   1931:        {
                   1932:          bgp_error(conn, 3, 3, &bgp_mandatory_attrs[i], 1);
                   1933:          return NULL;
                   1934:        }
                   1935:     }
                   1936: 
                   1937:   /* When receiving attributes from non-AS4-aware BGP speaker,
                   1938:    * we have to reconstruct 4B AS_PATH and AGGREGATOR attributes
                   1939:    */
                   1940:   if (! bgp->as4_session)
                   1941:     bgp_reconstruct_4b_atts(bgp, a, pool);
                   1942: 
                   1943:   bgp_remove_as4_attrs(bgp, a);
                   1944: 
                   1945:   /* If the AS path attribute contains our AS, reject the routes */
                   1946:   if (bgp_as_path_loopy(bgp, a))
                   1947:     goto withdraw;
                   1948: 
                   1949:   /* Two checks for IBGP loops caused by route reflection, RFC 4456 */ 
                   1950:   if (bgp_originator_id_loopy(bgp, a) ||
                   1951:       bgp_cluster_list_loopy(bgp, a))
                   1952:     goto withdraw;
                   1953: 
                   1954:   /* If there's no local preference, define one */
                   1955:   if (!(seen[0] & (1 << BA_LOCAL_PREF)))
                   1956:     bgp_attach_attr(&a->eattrs, pool, BA_LOCAL_PREF, bgp->cf->default_local_pref);
                   1957: 
                   1958:   return a;
                   1959: 
                   1960: withdraw:
                   1961:   return NULL;
                   1962: 
                   1963: malformed:
                   1964:   bgp_error(conn, 3, 1, NULL, 0);
                   1965:   return NULL;
                   1966: 
                   1967: err:
                   1968:   bgp_error(conn, 3, errcode, attr_start, z+l-attr_start);
                   1969:   return NULL;
                   1970: }
                   1971: 
                   1972: int
                   1973: bgp_get_attr(eattr *a, byte *buf, int buflen)
                   1974: {
                   1975:   uint i = EA_ID(a->id);
                   1976:   struct attr_desc *d;
                   1977:   int len;
                   1978: 
                   1979:   if (ATTR_KNOWN(i))
                   1980:     {
                   1981:       d = &bgp_attr_table[i];
                   1982:       len = bsprintf(buf, "%s", d->name);
                   1983:       buf += len;
                   1984:       if (d->format)
                   1985:        {
                   1986:          *buf++ = ':';
                   1987:          *buf++ = ' ';
                   1988:          d->format(a, buf, buflen - len - 2);
                   1989:          return GA_FULL;
                   1990:        }
                   1991:       return GA_NAME;
                   1992:     }
                   1993:   bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
                   1994:   return GA_NAME;
                   1995: }
                   1996: 
                   1997: void
                   1998: bgp_init_bucket_table(struct bgp_proto *p)
                   1999: {
                   2000:   p->hash_size = 256;
                   2001:   p->hash_limit = p->hash_size * 4;
                   2002:   p->bucket_hash = mb_allocz(p->p.pool, p->hash_size * sizeof(struct bgp_bucket *));
                   2003:   init_list(&p->bucket_queue);
                   2004:   p->withdraw_bucket = NULL;
                   2005:   // fib_init(&p->prefix_fib, p->p.pool, sizeof(struct bgp_prefix), 0, bgp_init_prefix);
                   2006: }
                   2007: 
                   2008: void
                   2009: bgp_free_bucket_table(struct bgp_proto *p)
                   2010: {
                   2011:   mb_free(p->bucket_hash);
                   2012:   p->bucket_hash = NULL;
                   2013: 
                   2014:   struct bgp_bucket *b;
                   2015:   WALK_LIST_FIRST(b, p->bucket_queue)
                   2016:   {
                   2017:     rem_node(&b->send_node);
                   2018:     mb_free(b);
                   2019:   }
                   2020: 
                   2021:   mb_free(p->withdraw_bucket);
                   2022:   p->withdraw_bucket = NULL;
                   2023: }
                   2024: 
                   2025: void
                   2026: bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
                   2027: {
                   2028:   eattr *p = ea_find(attrs, EA_CODE(EAP_BGP, BA_AS_PATH));
                   2029:   eattr *o = ea_find(attrs, EA_CODE(EAP_BGP, BA_ORIGIN));
                   2030:   u32 origas;
                   2031: 
                   2032:   buf += bsprintf(buf, " (%d", e->pref);
                   2033: 
                   2034:   if (e->u.bgp.suppressed)
                   2035:     buf += bsprintf(buf, "-");
                   2036: 
1.1.1.2 ! misho    2037:   if (rte_stale(e))
        !          2038:     buf += bsprintf(buf, "s");
        !          2039: 
1.1       misho    2040:   if (e->attrs->hostentry)
                   2041:     {
                   2042:       if (!rte_resolvable(e))
                   2043:        buf += bsprintf(buf, "/-");
                   2044:       else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
                   2045:        buf += bsprintf(buf, "/?");
                   2046:       else
                   2047:        buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
                   2048:     }
                   2049:   buf += bsprintf(buf, ") [");
                   2050: 
                   2051:   if (p && as_path_get_last(p->u.ptr, &origas))
                   2052:     buf += bsprintf(buf, "AS%u", origas);
                   2053:   if (o)
                   2054:     buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
                   2055:   strcpy(buf, "]");
                   2056: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>