[BACK]Return to mpls_proto.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / netmpls

File: [cvs.NetBSD.org] / src / sys / netmpls / mpls_proto.c (download)

Revision 1.30, Mon Oct 3 11:06:06 2016 UTC (7 years, 6 months ago) by ozaki-r
Branch: MAIN
CVS Tags: prg-localcount2-base3, prg-localcount2-base2, prg-localcount2-base1, prg-localcount2-base, prg-localcount2, pgoyette-localcount-20170426, pgoyette-localcount-20170320, pgoyette-localcount-20170107, pgoyette-localcount-20161104, perseant-stdc-iso10646-base, perseant-stdc-iso10646, nick-nhusb-base-20170825, nick-nhusb-base-20170204, nick-nhusb-base-20161204, nick-nhusb-base-20161004, netbsd-8-base, jdolecek-ncq-base, jdolecek-ncq, bouyer-socketcan-base1, bouyer-socketcan-base, bouyer-socketcan
Branch point for: netbsd-8
Changes since 1.29: +3 -2 lines

Fix race condition on ifqueue used by traditional netisr

If a underlying network device driver supports MSI/MSI-X, RX interrupts
can be delivered to arbitrary CPUs. This means that Layer 2 subroutines
such as ether_input (softint) and subsequent Layer 3 subroutines (softint)
which are called via traditional netisr can be dispatched on an arbitrary
CPU. Layer 2 subroutines now run without any locks (expected) and so a
Layer 2 subroutine and a Layer 3 subroutine can run in parallel.

There is a shared data between a Layer 2 routine and a Layer 3 routine,
that is ifqueue and IF_ENQUEUE (from L2) and IF_DEQUEUE (from L3) on it
are racy now.

To fix the race condition, use ifqueue#ifq_lock to protect ifqueue
instead of splnet that is meaningless now.

The same race condition exists in route_intr. Fix it as well.

Reviewed by knakahara@

/*	$NetBSD: mpls_proto.c,v 1.30 2016/10/03 11:06:06 ozaki-r Exp $ */

/*
 * Copyright (c) 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Mihai Chelaru <kefren@NetBSD.org>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: mpls_proto.c,v 1.30 2016/10/03 11:06:06 ozaki-r Exp $");

#ifdef _KERNEL_OPT
#include "opt_inet.h"
#include "opt_mbuftrace.h"
#endif

#include <sys/param.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/domain.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>

#include <net/route.h>

#include <netmpls/mpls.h>
#include <netmpls/mpls_var.h>

struct ifqueue mplsintrq;

static int mpls_attach(struct socket *, int);
static void sysctl_net_mpls_setup(struct sysctllog **);

#ifdef MBUFTRACE
struct mowner mpls_owner = MOWNER_INIT("MPLS", "");
#endif

int mpls_defttl = 255;
int mpls_mapttl_inet = 1;
int mpls_mapttl_inet6 = 1;
int mpls_icmp_respond = 0;
int mpls_forwarding = 0;
int mpls_frame_accept = 0;
int mpls_mapprec_inet = 1;
int mpls_mapclass_inet6 = 1;
int mpls_rfc4182 = 1;

void mpls_init(void)
{
#ifdef MBUFTRACE
	MOWNER_ATTACH(&mpls_owner);
#endif
	memset(&mplsintrq, 0, sizeof(mplsintrq));
	mplsintrq.ifq_maxlen = 256;
	IFQ_LOCK_INIT(&mplsintrq);

	sysctl_net_mpls_setup(NULL);
}

static int
mpls_attach(struct socket *so, int proto)
{
	int error = EOPNOTSUPP;

	sosetlock(so);
	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
		error = soreserve(so, 8192, 8192);
	}
	return error;
}

static void
mpls_detach(struct socket *so)
{
}

static int
mpls_accept(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_bind(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_listen(struct socket *so, struct lwp *l)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_connect(struct socket *so, struct sockaddr *nam, struct lwp *l)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_connect2(struct socket *so, struct socket *so2)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_disconnect(struct socket *so)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_shutdown(struct socket *so)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_abort(struct socket *so)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
{
	return EOPNOTSUPP;
}

static int
mpls_stat(struct socket *so, struct stat *ub)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_peeraddr(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_sockaddr(struct socket *so, struct sockaddr *nam)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_rcvd(struct socket *so, int flags, struct lwp *l)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_recvoob(struct socket *so, struct mbuf *m, int flags)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_send(struct socket *so, struct mbuf *m, struct sockaddr *nam,
    struct mbuf *control, struct lwp *l)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control)
{
	KASSERT(solocked(so));

	return EOPNOTSUPP;
}

static int
mpls_purgeif(struct socket *so, struct ifnet *ifp)
{

	return EOPNOTSUPP;
}

/*
 * Sysctl for MPLS variables.
 */
static void
sysctl_net_mpls_setup(struct sysctllog **clog)
{

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT,
                       CTLTYPE_NODE, "mpls", NULL,
                       NULL, 0, NULL, 0,
                       CTL_NET, PF_MPLS, CTL_EOL);

        sysctl_createv(clog, 0, NULL, NULL,
                       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
                       CTLTYPE_INT, "ttl",
                       SYSCTL_DESCR("Default TTL"),
                       NULL, 0, &mpls_defttl, 0,
                       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "forwarding",
		       SYSCTL_DESCR("MPLS forwarding"),
		       NULL, 0, &mpls_forwarding, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "accept",
		       SYSCTL_DESCR("Accept MPLS Frames"),
		       NULL, 0, &mpls_frame_accept, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "ifq_len",
		       SYSCTL_DESCR("MPLS queue length"),
		       NULL, 0, &mplsintrq.ifq_maxlen, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "rfc4182",
		       SYSCTL_DESCR("RFC 4182 conformance"),
		       NULL, 0, &mpls_rfc4182, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
#ifdef INET
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "inet_mapttl",
		       SYSCTL_DESCR("Map IP TTL"),
		       NULL, 0, &mpls_mapttl_inet, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "inet_map_prec",
		       SYSCTL_DESCR("Map IP Prec"),
		       NULL, 0, &mpls_mapprec_inet, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "icmp_respond",
		       SYSCTL_DESCR("Emit ICMP packets on errors"),
		       NULL, 0, &mpls_icmp_respond, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
#endif
#ifdef INET6
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "inet6_mapttl",
		       SYSCTL_DESCR("Map IP6 TTL"),
		       NULL, 0, &mpls_mapttl_inet6, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, NULL, NULL,
		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
		       CTLTYPE_INT, "inet6_map_prec",
		       SYSCTL_DESCR("Map IP6 class"),
		       NULL, 0, &mpls_mapclass_inet6, 0,
		       CTL_NET, PF_MPLS, CTL_CREATE, CTL_EOL);
#endif
}

DOMAIN_DEFINE(mplsdomain);

PR_WRAP_USRREQS(mpls)
#define	mpls_attach	mpls_attach_wrapper
#define	mpls_detach	mpls_detach_wrapper
#define	mpls_accept	mpls_accept_wrapper
#define	mpls_bind	mpls_bind_wrapper
#define	mpls_listen	mpls_listen_wrapper
#define	mpls_connect	mpls_connect_wrapper
#define	mpls_connect2	mpls_connect2_wrapper
#define	mpls_disconnect	mpls_disconnect_wrapper
#define	mpls_shutdown	mpls_shutdown_wrapper
#define	mpls_abort	mpls_abort_wrapper
#define	mpls_ioctl	mpls_ioctl_wrapper
#define	mpls_stat	mpls_stat_wrapper
#define	mpls_peeraddr	mpls_peeraddr_wrapper
#define	mpls_sockaddr	mpls_sockaddr_wrapper
#define	mpls_rcvd	mpls_rcvd_wrapper
#define	mpls_recvoob	mpls_recvoob_wrapper
#define	mpls_send	mpls_send_wrapper
#define	mpls_sendoob	mpls_sendoob_wrapper
#define	mpls_purgeif	mpls_purgeif_wrapper

static const struct pr_usrreqs mpls_usrreqs = {
	.pr_attach	= mpls_attach,
	.pr_detach	= mpls_detach,
	.pr_accept	= mpls_accept,
	.pr_bind	= mpls_bind,
	.pr_listen	= mpls_listen,
	.pr_connect	= mpls_connect,
	.pr_connect2	= mpls_connect2,
	.pr_disconnect	= mpls_disconnect,
	.pr_shutdown	= mpls_shutdown,
	.pr_abort	= mpls_abort,
	.pr_ioctl	= mpls_ioctl,
	.pr_stat	= mpls_stat,
	.pr_peeraddr	= mpls_peeraddr,
	.pr_sockaddr	= mpls_sockaddr,
	.pr_rcvd	= mpls_rcvd,
	.pr_recvoob	= mpls_recvoob,
	.pr_send	= mpls_send,
	.pr_sendoob	= mpls_sendoob,
	.pr_purgeif	= mpls_purgeif,
};

const struct protosw mplssw[] = {
	{	.pr_domain = &mplsdomain,
		.pr_init = mpls_init,
	},
	{
		.pr_type = SOCK_DGRAM,
		.pr_domain = &mplsdomain,
		.pr_flags = PR_ATOMIC | PR_ADDR,
		.pr_usrreqs = &mpls_usrreqs,
	},
	{
		.pr_type = SOCK_RAW,
		.pr_domain = &mplsdomain,
		.pr_flags = PR_ATOMIC | PR_ADDR,
		.pr_usrreqs = &mpls_usrreqs,
	},
};

struct domain mplsdomain = {
	.dom_family = PF_MPLS,
	.dom_name = "MPLS",
	.dom_init = NULL,
	.dom_externalize = NULL,
	.dom_dispose = NULL, 
	.dom_protosw = mplssw,
	.dom_protoswNPROTOSW = &mplssw[__arraycount(mplssw)],
	.dom_rtattach = rt_inithead,
	.dom_rtoffset = offsetof(struct sockaddr_mpls, smpls_addr) << 3,
	.dom_maxrtkey = sizeof(union mpls_shim),
	.dom_ifattach = NULL,
	.dom_ifdetach = NULL,
	.dom_ifqueues = { &mplsintrq, NULL },
	.dom_link = { NULL },
	.dom_mowner = MOWNER_INIT("MPLS", ""),
	.dom_sa_cmpofs = offsetof(struct sockaddr_mpls, smpls_addr),
	.dom_sa_cmplen = sizeof(union mpls_shim),
	.dom_rtcache = LIST_HEAD_INITIALIZER(mplsdomain.dom_rtcache)
};