File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / quagga / bgpd / bgp_mpath.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Nov 2 10:09:10 2016 UTC (8 years, 1 month ago) by misho
Branches: quagga, MAIN
CVS tags: v1_0_20160315, HEAD
quagga 1.0.20160315

/* $QuaggaId: Format:%an, %ai, %h$ $
 *
 * BGP Multipath
 * Copyright (C) 2010 Google Inc.
 *
 * This file is part of Quagga
 *
 * Quagga is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
 * later version.
 *
 * Quagga is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Quagga; see the file COPYING.  If not, write to the Free
 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 */

#include <zebra.h>

#include "command.h"
#include "prefix.h"
#include "linklist.h"
#include "sockunion.h"
#include "memory.h"
#include "filter.h"

#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_aspath.h"
#include "bgpd/bgp_community.h"
#include "bgpd/bgp_ecommunity.h"
#include "bgpd/bgp_mpath.h"

bool
bgp_mpath_is_configured_sort (struct bgp *bgp, bgp_peer_sort_t sort,
                              afi_t afi, safi_t safi)
{
  struct bgp_maxpaths_cfg *cfg = &bgp->maxpaths[afi][safi];

  /* XXX: BGP_DEFAULT_MAXPATHS is 1, and this test only seems to make sense
   * if if it stays 1, so not sure the DEFAULT define is that useful.
   */
  switch (sort)
    {
      case BGP_PEER_IBGP:
        return cfg->maxpaths_ibgp != BGP_DEFAULT_MAXPATHS;
      case BGP_PEER_EBGP:
        return cfg->maxpaths_ebgp != BGP_DEFAULT_MAXPATHS;
      default:
        return false;
    }
}

bool
bgp_mpath_is_configured (struct bgp *bgp, afi_t afi, safi_t safi)
{
  return bgp_mpath_is_configured_sort (bgp, BGP_PEER_IBGP, afi, safi)
         || bgp_mpath_is_configured_sort (bgp, BGP_PEER_EBGP, afi, safi);
}

/*
 * bgp_maximum_paths_set
 *
 * Record maximum-paths configuration for BGP instance
 */
int
bgp_maximum_paths_set (struct bgp *bgp, afi_t afi, safi_t safi,
                       int peertype, u_int16_t maxpaths)
{
  if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
    return -1;

  switch (peertype)
    {
    case BGP_PEER_IBGP:
      bgp->maxpaths[afi][safi].maxpaths_ibgp = maxpaths;
      break;
    case BGP_PEER_EBGP:
      bgp->maxpaths[afi][safi].maxpaths_ebgp = maxpaths;
      break;
    default:
      return -1;
    }

  return 0;
}

/*
 * bgp_maximum_paths_unset
 *
 * Remove maximum-paths configuration from BGP instance
 */
int
bgp_maximum_paths_unset (struct bgp *bgp, afi_t afi, safi_t safi,
                         int peertype)
{
  if (!bgp || (afi >= AFI_MAX) || (safi >= SAFI_MAX))
    return -1;

  switch (peertype)
    {
    case BGP_PEER_IBGP:
      bgp->maxpaths[afi][safi].maxpaths_ibgp = BGP_DEFAULT_MAXPATHS;
      break;
    case BGP_PEER_EBGP:
      bgp->maxpaths[afi][safi].maxpaths_ebgp = BGP_DEFAULT_MAXPATHS;
      break;
    default:
      return -1;
    }

  return 0;
}

/*
 * bgp_info_nexthop_cmp
 *
 * Compare the nexthops of two paths. Return value is less than, equal to,
 * or greater than zero if bi1 is respectively less than, equal to,
 * or greater than bi2.
 */
static int
bgp_info_nexthop_cmp (struct bgp_info *bi1, struct bgp_info *bi2)
{
  struct attr_extra *ae1, *ae2;
  int compare;

  ae1 = bi1->attr->extra;
  ae2 = bi2->attr->extra;

  compare = IPV4_ADDR_CMP (&bi1->attr->nexthop, &bi2->attr->nexthop);

  if (!compare && ae1 && ae2 && (ae1->mp_nexthop_len == ae2->mp_nexthop_len))
    {
      switch (ae1->mp_nexthop_len)
        {
        case 4:
        case 12:
          compare = IPV4_ADDR_CMP (&ae1->mp_nexthop_global_in,
                                   &ae2->mp_nexthop_global_in);
          break;
        case 16:
          compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
                                   &ae2->mp_nexthop_global);
          break;
        case 32:
          compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_global,
                                   &ae2->mp_nexthop_global);
          if (!compare)
            compare = IPV6_ADDR_CMP (&ae1->mp_nexthop_local,
                                     &ae2->mp_nexthop_local);
          break;
        }
    }

  return compare;
}

/*
 * bgp_info_mpath_cmp
 *
 * This function determines our multipath list ordering. By ordering
 * the list we can deterministically select which paths are included
 * in the multipath set. The ordering also helps in detecting changes
 * in the multipath selection so we can detect whether to send an
 * update to zebra.
 *
 * The order of paths is determined first by received nexthop, and then
 * by peer address if the nexthops are the same.
 */
static int
bgp_info_mpath_cmp (void *val1, void *val2)
{
  struct bgp_info *bi1, *bi2;
  int compare;

  bi1 = val1;
  bi2 = val2;

  compare = bgp_info_nexthop_cmp (bi1, bi2);

  if (!compare)
    compare = sockunion_cmp (bi1->peer->su_remote, bi2->peer->su_remote);

  return compare;
}

/*
 * bgp_mp_list_init
 *
 * Initialize the mp_list, which holds the list of multipaths
 * selected by bgp_best_selection
 */
void
bgp_mp_list_init (struct list *mp_list)
{
  assert (mp_list);
  memset (mp_list, 0, sizeof (struct list));
  mp_list->cmp = bgp_info_mpath_cmp;
}

/*
 * bgp_mp_list_clear
 *
 * Clears all entries out of the mp_list
 */
void
bgp_mp_list_clear (struct list *mp_list)
{
  assert (mp_list);
  list_delete_all_node (mp_list);
}

/*
 * bgp_mp_list_add
 *
 * Adds a multipath entry to the mp_list
 */
void
bgp_mp_list_add (struct list *mp_list, struct bgp_info *mpinfo)
{
  assert (mp_list && mpinfo);
  listnode_add_sort (mp_list, mpinfo);
}

/*
 * bgp_info_mpath_new
 *
 * Allocate and zero memory for a new bgp_info_mpath element
 */
static struct bgp_info_mpath *
bgp_info_mpath_new (void)
{
  struct bgp_info_mpath *new_mpath;
  new_mpath = XCALLOC (MTYPE_BGP_MPATH_INFO, sizeof (struct bgp_info_mpath));
  return new_mpath;
}

/*
 * bgp_info_mpath_free
 *
 * Release resources for a bgp_info_mpath element and zero out pointer
 */
void
bgp_info_mpath_free (struct bgp_info_mpath **mpath)
{
  if (mpath && *mpath)
    {
      if ((*mpath)->mp_attr)
        bgp_attr_unintern (&(*mpath)->mp_attr);
      XFREE (MTYPE_BGP_MPATH_INFO, *mpath);
      *mpath = NULL;
    }
}

/*
 * bgp_info_mpath_get
 *
 * Fetch the mpath element for the given bgp_info. Used for
 * doing lazy allocation.
 */
static struct bgp_info_mpath *
bgp_info_mpath_get (struct bgp_info *binfo)
{
  struct bgp_info_mpath *mpath;
  if (!binfo->mpath)
    {
      mpath = bgp_info_mpath_new();
      if (!mpath)
        return NULL;
      binfo->mpath = mpath;
      mpath->mp_info = binfo;
    }
  return binfo->mpath;
}

/*
 * bgp_info_mpath_enqueue
 *
 * Enqueue a path onto the multipath list given the previous multipath
 * list entry
 */
static void
bgp_info_mpath_enqueue (struct bgp_info *prev_info, struct bgp_info *binfo)
{
  struct bgp_info_mpath *prev, *mpath;

  prev = bgp_info_mpath_get (prev_info);
  mpath = bgp_info_mpath_get (binfo);
  if (!prev || !mpath)
    return;

  mpath->mp_next = prev->mp_next;
  mpath->mp_prev = prev;
  if (prev->mp_next)
    prev->mp_next->mp_prev = mpath;
  prev->mp_next = mpath;

  SET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
}

/*
 * bgp_info_mpath_dequeue
 *
 * Remove a path from the multipath list
 */
void
bgp_info_mpath_dequeue (struct bgp_info *binfo)
{
  struct bgp_info_mpath *mpath = binfo->mpath;
  if (!mpath)
    return;
  if (mpath->mp_prev)
    mpath->mp_prev->mp_next = mpath->mp_next;
  if (mpath->mp_next)
    mpath->mp_next->mp_prev = mpath->mp_prev;
  mpath->mp_next = mpath->mp_prev = NULL;
  UNSET_FLAG (binfo->flags, BGP_INFO_MULTIPATH);
}

/*
 * bgp_info_mpath_next
 *
 * Given a bgp_info, return the next multipath entry
 */
struct bgp_info *
bgp_info_mpath_next (struct bgp_info *binfo)
{
  if (!binfo->mpath || !binfo->mpath->mp_next)
    return NULL;
  return binfo->mpath->mp_next->mp_info;
}

/*
 * bgp_info_mpath_first
 *
 * Given bestpath bgp_info, return the first multipath entry.
 */
struct bgp_info *
bgp_info_mpath_first (struct bgp_info *binfo)
{
  return bgp_info_mpath_next (binfo);
}

/*
 * bgp_info_mpath_count
 *
 * Given the bestpath bgp_info, return the number of multipath entries
 */
u_int32_t
bgp_info_mpath_count (struct bgp_info *binfo)
{
  if (!binfo->mpath)
    return 0;
  return binfo->mpath->mp_count;
}

/*
 * bgp_info_mpath_count_set
 *
 * Sets the count of multipaths into bestpath's mpath element
 */
static void
bgp_info_mpath_count_set (struct bgp_info *binfo, u_int32_t count)
{
  struct bgp_info_mpath *mpath;
  if (!count && !binfo->mpath)
    return;
  mpath = bgp_info_mpath_get (binfo);
  if (!mpath)
    return;
  mpath->mp_count = count;
}

/*
 * bgp_info_mpath_attr
 *
 * Given bestpath bgp_info, return aggregated attribute set used
 * for advertising the multipath route
 */
struct attr *
bgp_info_mpath_attr (struct bgp_info *binfo)
{
  if (!binfo->mpath)
    return NULL;
  return binfo->mpath->mp_attr;
}

/*
 * bgp_info_mpath_attr_set
 *
 * Sets the aggregated attribute into bestpath's mpath element
 */
static void
bgp_info_mpath_attr_set (struct bgp_info *binfo, struct attr *attr)
{
  struct bgp_info_mpath *mpath;
  if (!attr && !binfo->mpath)
    return;
  mpath = bgp_info_mpath_get (binfo);
  if (!mpath)
    return;
  mpath->mp_attr = attr;
}

/*
 * bgp_info_mpath_update
 *
 * Compare and sync up the multipath list with the mp_list generated by
 * bgp_best_selection
 */
void
bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best,
                       struct bgp_info *old_best, struct list *mp_list,
                        afi_t afi, safi_t safi)
{
  u_int16_t maxpaths, mpath_count, old_mpath_count;
  struct listnode *mp_node, *mp_next_node;
  struct bgp_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath;
  int mpath_changed, debug;
  char pfx_buf[INET_ADDRSTRLEN], nh_buf[2][INET_ADDRSTRLEN];
  struct bgp_maxpaths_cfg *mpath_cfg = NULL;

  mpath_changed = 0;
  maxpaths = BGP_DEFAULT_MAXPATHS;
  mpath_count = 0;
  cur_mpath = NULL;
  old_mpath_count = 0;
  prev_mpath = new_best;
  mp_node = listhead (mp_list);

  debug = BGP_DEBUG (events, EVENTS);

  if (debug)
    prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf));

  if (new_best)
    {
      mpath_cfg = &new_best->peer->bgp->maxpaths[afi][safi];
      mpath_count++;
      if (new_best != old_best)
        bgp_info_mpath_dequeue (new_best);
      maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) ?
        mpath_cfg->maxpaths_ibgp : mpath_cfg->maxpaths_ebgp;
    }

  if (old_best)
    {
      cur_mpath = bgp_info_mpath_first (old_best);
      old_mpath_count = bgp_info_mpath_count (old_best);
      bgp_info_mpath_count_set (old_best, 0);
      bgp_info_mpath_dequeue (old_best);
    }

  /*
   * We perform an ordered walk through both lists in parallel.
   * The reason for the ordered walk is that if there are paths
   * that were previously multipaths and are still multipaths, the walk
   * should encounter them in both lists at the same time. Otherwise
   * there will be paths that are in one list or another, and we
   * will deal with these separately.
   *
   * Note that new_best might be somewhere in the mp_list, so we need
   * to skip over it
   */
  while (mp_node || cur_mpath)
    {
      /*
       * We can bail out of this loop if all existing paths on the
       * multipath list have been visited (for cleanup purposes) and
       * the maxpath requirement is fulfulled
       */
      if (!cur_mpath && (mpath_count >= maxpaths))
        break;

      mp_next_node = mp_node ? listnextnode (mp_node) : NULL;
      next_mpath = cur_mpath ? bgp_info_mpath_next (cur_mpath) : NULL;

      /*
       * If equal, the path was a multipath and is still a multipath.
       * Insert onto new multipath list if maxpaths allows.
       */
      if (mp_node && (listgetdata (mp_node) == cur_mpath))
        {
          list_delete_node (mp_list, mp_node);
          bgp_info_mpath_dequeue (cur_mpath);
          if ((mpath_count < maxpaths) &&
              bgp_info_nexthop_cmp (prev_mpath, cur_mpath))
            {
              bgp_info_mpath_enqueue (prev_mpath, cur_mpath);
              prev_mpath = cur_mpath;
              mpath_count++;
            }
          else
            {
              mpath_changed = 1;
              if (debug)
                zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
                            inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
                                       nh_buf[0], sizeof (nh_buf[0])),
                            sockunion2str (cur_mpath->peer->su_remote,
                                           nh_buf[1], sizeof (nh_buf[1])));
            }
          mp_node = mp_next_node;
          cur_mpath = next_mpath;
          continue;
        }

      if (cur_mpath && (!mp_node ||
                        (bgp_info_mpath_cmp (cur_mpath,
                                             listgetdata (mp_node)) < 0)))
        {
          /*
           * If here, we have an old multipath and either the mp_list
           * is finished or the next mp_node points to a later
           * multipath, so we need to purge this path from the
           * multipath list
           */
          bgp_info_mpath_dequeue (cur_mpath);
          mpath_changed = 1;
          if (debug)
            zlog_debug ("%s remove mpath nexthop %s peer %s", pfx_buf,
                        inet_ntop (AF_INET, &cur_mpath->attr->nexthop,
                                   nh_buf[0], sizeof (nh_buf[0])),
                        sockunion2str (cur_mpath->peer->su_remote,
                                       nh_buf[1], sizeof (nh_buf[1])));
          cur_mpath = next_mpath;
        }
      else
        {
          /*
           * If here, we have a path on the mp_list that was not previously
           * a multipath (due to non-equivalance or maxpaths exceeded),
           * or the matching multipath is sorted later in the multipath
           * list. Before we enqueue the path on the new multipath list,
           * make sure its not on the old_best multipath list or referenced
           * via next_mpath:
           * - If next_mpath points to this new path, update next_mpath to
           *   point to the multipath after this one
           * - Dequeue the path from the multipath list just to make sure
           */
          new_mpath = listgetdata (mp_node);
          list_delete_node (mp_list, mp_node);
          if ((mpath_count < maxpaths) && (new_mpath != new_best) &&
              bgp_info_nexthop_cmp (prev_mpath, new_mpath))
            {
              if (new_mpath == next_mpath)
                next_mpath = bgp_info_mpath_next (new_mpath);
              bgp_info_mpath_dequeue (new_mpath);

              bgp_info_mpath_enqueue (prev_mpath, new_mpath);
              prev_mpath = new_mpath;
              mpath_changed = 1;
              mpath_count++;
              if (debug)
                zlog_debug ("%s add mpath nexthop %s peer %s", pfx_buf,
                            inet_ntop (AF_INET, &new_mpath->attr->nexthop,
                                       nh_buf[0], sizeof (nh_buf[0])),
                            sockunion2str (new_mpath->peer->su_remote,
                                           nh_buf[1], sizeof (nh_buf[1])));
            }
          mp_node = mp_next_node;
        }
    }

  if (new_best)
    {
      bgp_info_mpath_count_set (new_best, mpath_count-1);
      if (mpath_changed || (bgp_info_mpath_count (new_best) != old_mpath_count))
        SET_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG);
    }
}

/*
 * bgp_mp_dmed_deselect
 *
 * Clean up multipath information for BGP_INFO_DMED_SELECTED path that
 * is not selected as best path
 */
void
bgp_mp_dmed_deselect (struct bgp_info *dmed_best)
{
  struct bgp_info *mpinfo, *mpnext;

  if (!dmed_best)
    return;

  for (mpinfo = bgp_info_mpath_first (dmed_best); mpinfo; mpinfo = mpnext)
    {
      mpnext = bgp_info_mpath_next (mpinfo);
      bgp_info_mpath_dequeue (mpinfo);
    }

  bgp_info_mpath_count_set (dmed_best, 0);
  UNSET_FLAG (dmed_best->flags, BGP_INFO_MULTIPATH_CHG);
  assert (bgp_info_mpath_first (dmed_best) == 0);
}

/*
 * bgp_info_mpath_aggregate_update
 *
 * Set the multipath aggregate attribute. We need to see if the
 * aggregate has changed and then set the ATTR_CHANGED flag on the
 * bestpath info so that a peer update will be generated. The
 * change is detected by generating the current attribute,
 * interning it, and then comparing the interned pointer with the
 * current value. We can skip this generate/compare step if there
 * is no change in multipath selection and no attribute change in
 * any multipath.
 */
void
bgp_info_mpath_aggregate_update (struct bgp_info *new_best,
                                 struct bgp_info *old_best)
{
  struct bgp_info *mpinfo;
  struct aspath *aspath;
  struct aspath *asmerge;
  struct attr *new_attr, *old_attr;
  u_char origin, attr_chg;
  struct community *community, *commerge;
  struct ecommunity *ecomm, *ecommerge;
  struct attr_extra *ae;
  struct attr attr = { 0 };

  if (old_best && (old_best != new_best) &&
      (old_attr = bgp_info_mpath_attr (old_best)))
    {
      bgp_attr_unintern (&old_attr);
      bgp_info_mpath_attr_set (old_best, NULL);
    }

  if (!new_best)
    return;

  if (!bgp_info_mpath_count (new_best))
    {
      if ((new_attr = bgp_info_mpath_attr (new_best)))
        {
          bgp_attr_unintern (&new_attr);
          bgp_info_mpath_attr_set (new_best, NULL);
          SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
        }
      return;
    }

  /*
   * Bail out here if the following is true:
   * - MULTIPATH_CHG bit is not set on new_best, and
   * - No change in bestpath, and
   * - ATTR_CHANGED bit is not set on new_best or any of the multipaths
   */
  if (!CHECK_FLAG (new_best->flags, BGP_INFO_MULTIPATH_CHG) &&
      (old_best == new_best))
    {
      attr_chg = 0;

      if (CHECK_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED))
        attr_chg = 1;
      else
        for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
             mpinfo = bgp_info_mpath_next (mpinfo))
          {
            if (CHECK_FLAG (mpinfo->flags, BGP_INFO_ATTR_CHANGED))
              {
                attr_chg = 1;
                break;
              }
          }

      if (!attr_chg)
        {
          assert (bgp_info_mpath_attr (new_best));
          return;
        }
    }

  bgp_attr_dup (&attr, new_best->attr);

  /* aggregate attribute from multipath constituents */
  aspath = aspath_dup (attr.aspath);
  origin = attr.origin;
  community = attr.community ? community_dup (attr.community) : NULL;
  ae = attr.extra;
  ecomm = (ae && ae->ecommunity) ? ecommunity_dup (ae->ecommunity) : NULL;

  for (mpinfo = bgp_info_mpath_first (new_best); mpinfo;
       mpinfo = bgp_info_mpath_next (mpinfo))
    {
      asmerge = aspath_aggregate (aspath, mpinfo->attr->aspath);
      aspath_free (aspath);
      aspath = asmerge;

      if (origin < mpinfo->attr->origin)
        origin = mpinfo->attr->origin;

      if (mpinfo->attr->community)
        {
          if (community)
            {
              commerge = community_merge (community, mpinfo->attr->community);
              community = community_uniq_sort (commerge);
              community_free (commerge);
            }
          else
            community = community_dup (mpinfo->attr->community);
        }

      ae = mpinfo->attr->extra;
      if (ae && ae->ecommunity)
        {
          if (ecomm)
            {
              ecommerge = ecommunity_merge (ecomm, ae->ecommunity);
              ecomm = ecommunity_uniq_sort (ecommerge);
              ecommunity_free (&ecommerge);
            }
          else
            ecomm = ecommunity_dup (ae->ecommunity);
        }
    }

  attr.aspath = aspath;
  attr.origin = origin;
  if (community)
    {
      attr.community = community;
      attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_COMMUNITIES);
    }
  if (ecomm)
    {
      ae = bgp_attr_extra_get (&attr);
      ae->ecommunity = ecomm;
      attr.flag |= ATTR_FLAG_BIT (BGP_ATTR_EXT_COMMUNITIES);
    }

  /* Zap multipath attr nexthop so we set nexthop to self */
  attr.nexthop.s_addr = 0;
  if (attr.extra)
    memset (&attr.extra->mp_nexthop_global, 0, sizeof (struct in6_addr));

  /* TODO: should we set ATOMIC_AGGREGATE and AGGREGATOR? */

  new_attr = bgp_attr_intern (&attr);
  bgp_attr_extra_free (&attr);

  if (new_attr != bgp_info_mpath_attr (new_best))
    {
      if ((old_attr = bgp_info_mpath_attr (new_best)))
        bgp_attr_unintern (&old_attr);
      bgp_info_mpath_attr_set (new_best, new_attr);
      SET_FLAG (new_best->flags, BGP_INFO_ATTR_CHANGED);
    }
  else
    bgp_attr_unintern (&new_attr);
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>