File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / pimd / pim.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jun 12 07:59:37 2017 UTC (7 years, 6 months ago) by misho
Branches: pimd, MAIN
CVS tags: v2_3_2, HEAD
pimd 2.3.2

/*
 * Copyright (c) 1998-2001
 * University of Southern California/Information Sciences Institute.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the project nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
/*
 *  $Id: pim.c,v 1.1.1.1 2017/06/12 07:59:37 misho Exp $
 */

#include "defs.h"

#define SEND_DEBUG_NUMBER 50	/* For throttling log messages */

/*
 * Exported variables.
 */
char	*pim_recv_buf;		/* input packet buffer   */
char	*pim_send_buf;		/* output packet buffer  */

uint32_t	allpimrouters_group;	/* ALL_PIM_ROUTERS address in net order */
int	pim_socket;		/* socket for PIM control msgs */

#ifdef RAW_OUTPUT_IS_RAW
extern int curttl;
#endif /* RAW_OUTPUT_IS_RAW */

/*
 * Local variables.
 */
static uint16_t ip_id = 0;
//static u_int pim_send_cnt = 0;


/*
 * Local function definitions.
 */
static void pim_read   (int f, fd_set *rfd);
static void accept_pim (ssize_t recvlen);
static int  send_frame (char *buf, size_t len, size_t frag, size_t mtu, struct sockaddr *dst, size_t salen);

/*
 * Setup raw kernel socket for PIM protocol and send/receive buffers.
 */
void init_pim(void)
{
    struct ip *ip;

    /* Setup the PIM raw socket */
    if ((pim_socket = socket(AF_INET, SOCK_RAW, IPPROTO_PIM)) < 0)
	logit(LOG_ERR, errno, "Failed creating PIM socket");
    k_hdr_include(pim_socket, TRUE);      /* include IP header when sending */
    k_set_sndbuf(pim_socket, SO_SEND_BUF_SIZE_MAX,
		 SO_SEND_BUF_SIZE_MIN);   /* lots of output buffering        */
    k_set_rcvbuf(pim_socket, SO_RECV_BUF_SIZE_MAX,
		 SO_RECV_BUF_SIZE_MIN);   /* lots of input buffering        */
    k_set_ttl(pim_socket, MINTTL);	  /* restrict multicasts to one hop */
    k_set_loop(pim_socket, FALSE);	  /* disable multicast loopback	    */

    allpimrouters_group = htonl(INADDR_ALL_PIM_ROUTERS);

    pim_recv_buf = calloc(1, RECV_BUF_SIZE);
    pim_send_buf = calloc(1, SEND_BUF_SIZE);
    if (!pim_recv_buf || !pim_send_buf)
	logit(LOG_ERR, 0, "Ran out of memory in init_pim()");

    /* One time setup in the buffers */
    ip		 = (struct ip *)pim_send_buf;
    memset(ip, 0, sizeof(*ip));
    ip->ip_v     = IPVERSION;
    ip->ip_hl    = (sizeof(struct ip) >> 2);
    ip->ip_tos   = 0;    /* TODO: setup?? */
    ip->ip_id    = 0;    /* Make sure to update ID field, maybe fragmenting below */
    ip->ip_off   = 0;
    ip->ip_p     = IPPROTO_PIM;
    ip->ip_sum   = 0;	 /* let kernel fill in */

    if (register_input_handler(pim_socket, pim_read) < 0)
	logit(LOG_ERR, 0,  "Failed registering pim_read() as an input handler");

    /* Initialize the building Join/Prune messages working area */
    build_jp_message_pool = (build_jp_message_t *)NULL;
    build_jp_message_pool_counter = 0;
}


/* Read a PIM message */
static void pim_read(int f __attribute__((unused)), fd_set *rfd __attribute__((unused)))
{
    ssize_t len;
    socklen_t dummy = 0;
    sigset_t block, oblock;

    while ((len = recvfrom(pim_socket, pim_recv_buf, RECV_BUF_SIZE, 0, NULL, &dummy)) < 0) {
	if (errno == EINTR)
	    continue;		/* Received signal, retry syscall. */

	logit(LOG_ERR, errno, "Failed recvfrom() in pim_read()");
	return;
    }

    sigemptyset(&block);
    sigaddset(&block, SIGALRM);
    if (sigprocmask(SIG_BLOCK, &block, &oblock) < 0)
	logit(LOG_ERR, errno, "sigprocmask");

    accept_pim(len);

    sigprocmask(SIG_SETMASK, &oblock, (sigset_t *)NULL);
}

static void accept_pim(ssize_t recvlen)
{
    uint32_t src, dst;
    struct ip *ip;
    pim_header_t *pim;
    int iphdrlen, pimlen;
    char source[20], dest[20];

    if (recvlen < (ssize_t)sizeof(struct ip)) {
	logit(LOG_WARNING, 0, "Received PIM packet too short (%u bytes) for IP header", recvlen);
	return;
    }

    ip		= (struct ip *)pim_recv_buf;
    src		= ip->ip_src.s_addr;
    dst		= ip->ip_dst.s_addr;
    iphdrlen	= ip->ip_hl << 2;

    pim		= (pim_header_t *)(pim_recv_buf + iphdrlen);
    pimlen	= recvlen - iphdrlen;

    /* Sanity check packet length */
    if (pimlen < (ssize_t)sizeof(*pim)) {
	logit(LOG_WARNING, 0, "IP data field too short (%d bytes) for PIM header, from %s to %s",
	      pimlen, inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	return;
    }

    IF_DEBUG(DEBUG_PIM_DETAIL) {
	IF_DEBUG(DEBUG_PIM) {
	    logit(LOG_DEBUG, 0, "RECV %5d bytes %s from %-15s to %s ", recvlen,
		  packet_kind(IPPROTO_PIM, pim->pim_type, 0),
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	}
    }

    /* TODO: Check PIM version */
    /* TODO: check the dest. is ALL_PIM_ROUTERS (if multicast address) */
    /* TODO: Checksum verification is done in each of the processing functions.
     * No need for checksum, if already done in the kernel?
     */
    switch (pim->pim_type) {
	case PIM_HELLO:
	    receive_pim_hello(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_REGISTER:
	    receive_pim_register(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_REGISTER_STOP:
	    receive_pim_register_stop(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_JOIN_PRUNE:
	    receive_pim_join_prune(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_BOOTSTRAP:
	    receive_pim_bootstrap(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_ASSERT:
	    receive_pim_assert(src, dst, (char *)(pim), pimlen);
	    break;

	case PIM_GRAFT:
	case PIM_GRAFT_ACK:
	    logit(LOG_INFO, 0, "ignore %s from %s to %s", packet_kind(IPPROTO_PIM, pim->pim_type, 0),
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	    break;

	case PIM_CAND_RP_ADV:
	    receive_pim_cand_rp_adv(src, dst, (char *)(pim), pimlen);
	    break;

	default:
	    logit(LOG_INFO, 0, "ignore unknown PIM message code %u from %s to %s", pim->pim_type,
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	    break;
    }
}


/*
 * Send a multicast PIM packet from src to dst, PIM message type = "type"
 * and data length (after the PIM header) = "len"
 */
void send_pim(char *buf, uint32_t src, uint32_t dst, int type, size_t len)
{
    struct sockaddr_in sin;
    struct ip *ip;
    pim_header_t *pim;
    int sendlen = sizeof(struct ip) + sizeof(pim_header_t) + len;
    int setloop = 0;
    char source[20], dest[20];

    /* Prepare the IP header */
    ip                 = (struct ip *)buf;
    ip->ip_id	       = htons(++ip_id);
    ip->ip_off         = 0;
    ip->ip_src.s_addr  = src;
    ip->ip_dst.s_addr  = dst;
    ip->ip_ttl         = MAXTTL;            /* applies to unicast only */
#ifdef HAVE_IP_HDRINCL_BSD_ORDER
    ip->ip_len         = sendlen;
#else
    ip->ip_len         = htons(sendlen);
#endif

    /* Prepare the PIM packet */
    pim		       = (pim_header_t *)(buf + sizeof(struct ip));
    pim->pim_type      = type;
    pim->pim_vers      = PIM_PROTOCOL_VERSION;
    pim->pim_reserved  = 0;
    pim->pim_cksum     = 0;

    /* TODO: XXX: if start using this code for PIM_REGISTERS, exclude the
     * encapsulated packet from the checsum. */
    pim->pim_cksum     = inet_cksum((uint16_t *)pim, sizeof(pim_header_t) + len);

    if (IN_MULTICAST(ntohl(dst))) {
	k_set_if(pim_socket, src);
	if ((dst == allhosts_group) ||
	    (dst == allrouters_group) ||
	    (dst == allpimrouters_group) ||
	    (dst == allreports_group)) {
	    setloop = 1;
	    k_set_loop(pim_socket, TRUE);
	}
#ifdef RAW_OUTPUT_IS_RAW
	ip->ip_ttl = curttl;
    } else {
	ip->ip_ttl = MAXTTL;
#endif /* RAW_OUTPUT_IS_RAW */
    }

    memset(&sin, 0, sizeof(sin));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = dst;
#ifdef HAVE_SA_LEN
    sin.sin_len = sizeof(sin);
#endif

    while (sendto(pim_socket, buf, sendlen, 0, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
	if (errno == EINTR)
	    continue;		/* Received signal, retry syscall. */
	if (errno == ENETDOWN || errno == ENODEV)
	    check_vif_state();
	else if (errno == EPERM || errno == EHOSTUNREACH)
	    logit(LOG_WARNING, 0, "Not allowed to send PIM message from %s to %s, possibly firewall"
#ifdef __linux__
		  ", or SELinux policy violation,"
#endif
		  " related problem."
		  ,
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	else
	    logit(LOG_WARNING, errno, "sendto from %s to %s",
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	if (setloop)
	    k_set_loop(pim_socket, FALSE);
	return;
    }

    if (setloop)
	k_set_loop(pim_socket, FALSE);

    IF_DEBUG(DEBUG_PIM_DETAIL) {
	IF_DEBUG(DEBUG_PIM) {
	    logit(LOG_DEBUG, 0, "SENT %5d bytes %s from %-15s to %s",
		  sendlen, packet_kind(IPPROTO_PIM, type, 0),
		  src == INADDR_ANY_N ? "INADDR_ANY" :
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	}
    }
}


/* TODO: This can be merged with the above procedure */
/*
 * Send an unicast PIM packet from src to dst, PIM message type = "type"
 * and data length (after the PIM common header) = "len"
 */
void send_pim_unicast(char *buf, int mtu, uint32_t src, uint32_t dst, int type, size_t len)
{
    struct sockaddr_in sin;
    struct ip *ip;
    pim_header_t *pim;
    int result, sendlen = sizeof(struct ip) + sizeof(pim_header_t) + len;
    char source[20], dest[20];

    /* Prepare the IP header */
    ip                 = (struct ip *)buf;
    ip->ip_id	       = htons(++ip_id);
    ip->ip_src.s_addr  = src;
    ip->ip_dst.s_addr  = dst;
    ip->ip_ttl         = MAXTTL; /* TODO: XXX: setup TTL from the inner mcast packet? */
#ifdef HAVE_IP_HDRINCL_BSD_ORDER
    ip->ip_len         = sendlen;
#else
    ip->ip_len         = htons(sendlen);
#endif

    /* Prepare the PIM packet */
    pim                = (pim_header_t *)(buf + sizeof(struct ip));
    pim->pim_type      = type;
    pim->pim_vers      = PIM_PROTOCOL_VERSION;
    pim->pim_reserved  = 0;
    pim->pim_cksum     = 0;

    /* XXX: The PIM_REGISTERs don't include the encapsulated
     * inner packet in the checksum.
     * Well, try to explain this to cisco...
     * If your RP is cisco and if it shows many PIM_REGISTER checksum
     * errors from this router, then #define BROKEN_CISCO_CHECKSUM here
     * or in your Makefile.
     * Note that such checksum is not in the spec, and such PIM_REGISTERS
     * may be dropped by some implementations (pimd should be OK).
     */
#ifdef BROKEN_CISCO_CHECKSUM
    pim->pim_cksum	= inet_cksum((uint16_t *)pim, sizeof(pim_header_t) + len);
#else
    if (PIM_REGISTER == type) {
	pim->pim_cksum	= inet_cksum((uint16_t *)pim, sizeof(pim_header_t) + sizeof(pim_register_t));
    } else {
        pim->pim_cksum	= inet_cksum((uint16_t *)pim, sizeof(pim_header_t) + len);
    }
#endif /* BROKEN_CISCO_CHECKSUM */

    memset(&sin, 0, sizeof(sin));
    sin.sin_family = AF_INET;
    sin.sin_addr.s_addr = dst;
#ifdef HAVE_SA_LEN
    sin.sin_len = sizeof(sin);
#endif

    IF_DEBUG(DEBUG_PIM_DETAIL) {
	IF_DEBUG(DEBUG_PIM) {
	    logit(LOG_DEBUG, 0, "SEND %5d bytes %s from %-15s to %s ...",
		  sendlen, packet_kind(IPPROTO_PIM, type, 0),
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	}
    }

    result = send_frame(buf, sendlen, 0, mtu, (struct sockaddr *)&sin, sizeof(sin));
    if (result) {
	logit(LOG_WARNING, errno, "sendto from %s to %s",
	      inet_fmt(ip->ip_src.s_addr, source, sizeof(source)),
	      inet_fmt(ip->ip_dst.s_addr, dest, sizeof(dest)));
	return;
    }

    IF_DEBUG(DEBUG_PIM_DETAIL) {
	IF_DEBUG(DEBUG_PIM) {
#if 0 /* TODO: use pim_send_cnt? */
	    if (++pim_send_cnt > SEND_DEBUG_NUMBER) {
		pim_send_cnt = 0;
		logit(LOG_DEBUG, 0, "SENT %5d bytes %s from %-15s to %s",
		      sendlen, packet_kind(IPPROTO_PIM, type, 0),
		      inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	    }
#endif
	    logit(LOG_DEBUG, 0, "SENT %5d bytes %s from %-15s to %s",
		  sendlen, packet_kind(IPPROTO_PIM, type, 0),
		  inet_fmt(src, source, sizeof(source)), inet_fmt(dst, dest, sizeof(dest)));
	}
    }
}


#if 1
/*
 * send unicast register frames
 * Version: Michael Fine
 * Staus:   Works, albeit non-optimal
 * Design:  Only fragments if sendto() fails with EMSGSIZE
 *          It then tries to re-send by splitting the frame in two equal halves,
 *          calling send_frame() recursively until the frame has been sent.
 */
static int send_frame(char *buf, size_t len, size_t frag, size_t mtu, struct sockaddr *dst, size_t salen)
{
    struct ip *ip = (struct ip *)buf;
    char source[20], dest[20];

    IF_DEBUG(DEBUG_PIM_REGISTER) {
	logit(LOG_INFO, 0, "Sending unicast: len = %d, frag %zd, mtu %zd, to %s",
	      len, frag, mtu, inet_fmt(ip->ip_dst.s_addr, source, sizeof(source)));
	dump_frame(NULL, buf, len);
    }

    while (sendto(pim_socket, buf, len, 0, dst, salen) < 0) {
	switch (errno) {
	    case EINTR:
		continue; /* Received signal, retry syscall. */

	    case ENETDOWN:
		check_vif_state();
		return -1;

	    case EMSGSIZE:
	    {
		/* split it in half and recursively send each half */
		size_t hdrsize = sizeof(struct ip);
		size_t newlen1 = ((len - hdrsize) / 2) & 0xFFF8; /* 8 byte boundary */
		size_t sendlen = newlen1 + hdrsize;
		size_t offset  = ntohs(ip->ip_off);

		/* send first half */
		ip->ip_len = htons(sendlen);
		ip->ip_off = htons(offset | IP_MF);
		if (send_frame(buf, sendlen, 1, newlen1, dst, salen) == 0) {
		    /* send second half */
		    struct ip *ip2 = (struct ip *)(buf + newlen1);
		    size_t newlen2 = len - sendlen;
		           sendlen = newlen2 + hdrsize;

		    memcpy(ip2, ip, hdrsize);
		    ip2->ip_len = htons(sendlen);
		    ip2->ip_off = htons(offset + (newlen1 >> 3)); /* keep flgs */
		    return send_frame((char *)ip2, sendlen, 1, newlen2, dst, salen);
		}

		return -1;
	    }

	    default:
		logit(LOG_WARNING, errno, "sendto from %s to %s",
		      inet_fmt(ip->ip_src.s_addr, source, sizeof(source)),
		      inet_fmt(ip->ip_dst.s_addr, dest, sizeof(dest)));
		return -1;
	}
    }

    return 0;
}

#else
/*
 * send unicast register frames
 * Version: Joachim Nilsson
 * Staus:   Does not work (yet!)
 * Design:  Fragment IP frames when the frame length exceeds the MTU
 *          reported from the interface.  Optimizes for less fragments
 *          and fewer syscalls, should get better network utilization.
 *          Can be easily modified to use the PMTU instead.
 *
 * Feel free to debug this version and submit your patches -- it should work! --Joachim
 */
static int send_frame(char *buf, size_t len, size_t frag, size_t mtu, struct sockaddr *dst, size_t salen)
{
    struct ip *next, *ip = (struct ip *)buf;
    size_t xferlen, offset;
    char source[20], dest[20];

    if (!mtu)
	mtu = IP_MSS;

    if (len > mtu)
	xferlen = (mtu - sizeof(struct ip)) & 0xFFF8;
    else
	xferlen = len;

    offset     = (ntohs(ip->ip_off) & IP_OFFMASK) + (frag >> 3);
    ip->ip_off = offset;
    len	       = len - xferlen;
    if (len)
	ip->ip_off |= IP_MF;
    ip->ip_off = htons(ip->ip_off);
    ip->ip_len = htons(xferlen);

    IF_DEBUG(DEBUG_PIM_REGISTER) {
	logit(LOG_INFO, 0, "Sending %-4d bytes %sunicast (MTU %-4d, offset %zd) to %s",
	      xferlen, len ? "fragmented " : "", mtu, offset,
	      inet_fmt(ip->ip_dst.s_addr, source, sizeof(source)));
	dump_frame(NULL, buf, xferlen);
    }

    /* send first fragment */
    while (sendto(pim_socket, ip, xferlen, 0, dst, salen) < 0) {
	switch (errno) {
	    case EINTR:
		continue;		/* Received signal, retry syscall. */

	    case ENETDOWN:
		check_vif_state();
		return -1;

	    case EMSGSIZE:
		if (mtu > IP_MSS)
		    return send_frame((char *)ip, xferlen, frag, IP_MSS, dst, salen);
		/* fall through */

	    default:
		return -1;
	}
    }

    /* send reminder */
    if (len) {
	size_t hdrsz = sizeof(struct ip);

	/* Update data pointers */
	next = (struct ip *)(buf + xferlen - hdrsz);
	memcpy(next, ip, hdrsz);

	return send_frame((char *)next, len + hdrsz, xferlen, mtu, dst, salen);
    }

    return 0;
}
#endif

/**
 * Local Variables:
 *  version-control: t
 *  indent-tabs-mode: t
 *  c-file-style: "ellemtel"
 *  c-basic-offset: 4
 * End:
 */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>