[BACK]Return to in6_offload.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / netinet6

File: [cvs.NetBSD.org] / src / sys / netinet6 / in6_offload.c (download)

Revision 1.4, Wed May 2 20:40:26 2007 UTC (16 years, 11 months ago) by dyoung
Branch: MAIN
CVS Tags: yamt-x86pmap-base4, yamt-x86pmap-base3, yamt-x86pmap-base2, yamt-x86pmap-base, yamt-x86pmap, yamt-pf42-baseX, yamt-pf42-base4, yamt-pf42-base3, yamt-pf42-base2, yamt-pf42-base, yamt-pf42, yamt-nfs-mp-base9, yamt-nfs-mp-base8, yamt-nfs-mp-base7, yamt-nfs-mp-base6, yamt-nfs-mp-base5, yamt-nfs-mp-base4, yamt-nfs-mp-base3, yamt-nfs-mp-base2, yamt-nfs-mp-base11, yamt-nfs-mp-base10, yamt-nfs-mp-base, yamt-nfs-mp, yamt-lazymbuf-base15, yamt-lazymbuf-base14, yamt-kmem-base3, yamt-kmem-base2, yamt-kmem-base, yamt-kmem, yamt-idlelwp-base8, wrstuden-revivesa-base-4, wrstuden-revivesa-base-3, wrstuden-revivesa-base-2, wrstuden-revivesa-base-1, wrstuden-revivesa-base, wrstuden-revivesa, vmlocking2-base3, vmlocking2-base2, vmlocking2-base1, vmlocking2, vmlocking-nbase, vmlocking-base, uebayasi-xip-base4, uebayasi-xip-base3, uebayasi-xip-base2, uebayasi-xip-base1, uebayasi-xip-base, uebayasi-xip, simonb-wapbl-nbase, simonb-wapbl-base, simonb-wapbl, reinoud-bufcleanup-nbase, reinoud-bufcleanup-base, nick-net80211-sync-base, nick-net80211-sync, nick-hppapmap-base4, nick-hppapmap-base3, nick-hppapmap-base2, nick-hppapmap-base, nick-hppapmap, nick-csl-alignment-base5, nick-csl-alignment-base, nick-csl-alignment, netbsd-5-base, netbsd-5-2-RELEASE, netbsd-5-2-RC1, netbsd-5-2-3-RELEASE, netbsd-5-2-2-RELEASE, netbsd-5-2-1-RELEASE, netbsd-5-2, netbsd-5-1-RELEASE, netbsd-5-1-RC4, netbsd-5-1-RC3, netbsd-5-1-RC2, netbsd-5-1-RC1, netbsd-5-1-5-RELEASE, netbsd-5-1-4-RELEASE, netbsd-5-1-3-RELEASE, netbsd-5-1-2-RELEASE, netbsd-5-1-1-RELEASE, netbsd-5-1, netbsd-5-0-RELEASE, netbsd-5-0-RC4, netbsd-5-0-RC3, netbsd-5-0-RC2, netbsd-5-0-RC1, netbsd-5-0-2-RELEASE, netbsd-5-0-1-RELEASE, netbsd-5-0, netbsd-5, mjf-ufs-trans-base, mjf-devfs2-base, mjf-devfs2, mjf-devfs-base, mjf-devfs, matt-premerge-20091211, matt-nb5-pq3-base, matt-nb5-mips64-u2-k2-k4-k7-k8-k9, matt-nb5-mips64-u1-k1-k5, matt-nb5-mips64-premerge-20101231, matt-nb5-mips64-premerge-20091211, matt-nb5-mips64-k15, matt-nb5-mips64, matt-nb4-mips64-k7-u2a-k9b, matt-mips64-base2, matt-mips64-base, matt-mips64, matt-armv6-prevmlocking, matt-armv6-nbase, matt-armv6-base, matt-armv6, keiichi-mipv6-nbase, keiichi-mipv6-base, keiichi-mipv6, jymxensuspend-base, jym-xensuspend-nbase, jym-xensuspend-base, jym-xensuspend, jmcneill-pm-base, jmcneill-pm, jmcneill-base, hpcarm-cleanup-nbase, hpcarm-cleanup-base, hpcarm-cleanup, haad-nbase2, haad-dm-base2, haad-dm-base1, haad-dm-base, haad-dm, cube-autoconf-base, cube-autoconf, bouyer-xeni386-nbase, bouyer-xeni386-merge1, bouyer-xeni386-base, bouyer-xeni386, bouyer-xenamd64-base2, bouyer-xenamd64-base, bouyer-xenamd64, ad-socklock-base1, ad-audiomp2-base, ad-audiomp2
Branch point for: rmind-uvmplock, matt-nb5-pq3
Changes since 1.3: +4 -4 lines

Eliminate address family-specific route caches (struct route, struct
route_in6, struct route_iso), replacing all caches with a struct
route.

The principle benefit of this change is that all of the protocol
families can benefit from route cache-invalidation, which is
necessary for correct routing.  Route-cache invalidation fixes an
ancient PR, kern/3508, at long last; it fixes various other PRs,
also.

Discussions with and ideas from Joerg Sonnenberger influenced this
work tremendously.  Of course, all design oversights and bugs are
mine.

DETAILS

1 I added to each address family a pool of sockaddrs.  I have
  introduced routines for allocating, copying, and duplicating,
  and freeing sockaddrs:

        struct sockaddr *sockaddr_alloc(sa_family_t af, int flags);
        struct sockaddr *sockaddr_copy(struct sockaddr *dst,
                                       const struct sockaddr *src);
        struct sockaddr *sockaddr_dup(const struct sockaddr *src, int flags);
        void sockaddr_free(struct sockaddr *sa);

  sockaddr_alloc() returns either a sockaddr from the pool belonging
  to the specified family, or NULL if the pool is exhausted.  The
  returned sockaddr has the right size for that family; sa_family
  and sa_len fields are initialized to the family and sockaddr
  length---e.g., sa_family = AF_INET and sa_len = sizeof(struct
  sockaddr_in).  sockaddr_free() puts the given sockaddr back into
  its family's pool.

  sockaddr_dup() and sockaddr_copy() work analogously to strdup()
  and strcpy(), respectively.  sockaddr_copy() KASSERTs that the
  family of the destination and source sockaddrs are alike.

  The 'flags' argumet for sockaddr_alloc() and sockaddr_dup() is
  passed directly to pool_get(9).

2 I added routines for initializing sockaddrs in each address
  family, sockaddr_in_init(), sockaddr_in6_init(), sockaddr_iso_init(),
  etc.  They are fairly self-explanatory.

3 structs route_in6 and route_iso are no more.  All protocol families
  use struct route.  I have changed the route cache, 'struct route',
  so that it does not contain storage space for a sockaddr.  Instead,
  struct route points to a sockaddr coming from the pool the sockaddr
  belongs to.  I added a new method to struct route, rtcache_setdst(),
  for setting the cache destination:

        int rtcache_setdst(struct route *, const struct sockaddr *);

  rtcache_setdst() returns 0 on success, or ENOMEM if no memory is
  available to create the sockaddr storage.

  It is now possible for rtcache_getdst() to return NULL if, say,
  rtcache_setdst() failed.  I check the return value for NULL
  everywhere in the kernel.

4 Each routing domain (struct domain) has a list of live route
  caches, dom_rtcache.  rtflushall(sa_family_t af) looks up the
  domain indicated by 'af', walks the domain's list of route caches
  and invalidates each one.

/*	$NetBSD: in6_offload.c,v 1.4 2007/05/02 20:40:26 dyoung Exp $	*/

/*-
 * Copyright (c)2006 YAMAMOTO Takashi,
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: in6_offload.c,v 1.4 2007/05/02 20:40:26 dyoung Exp $");

#include <sys/param.h>
#include <sys/mbuf.h>

#include <net/if.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/in6_offload.h>

struct ip6_tso_output_args {
	struct ifnet *ifp;
	struct ifnet *origifp;
	const struct sockaddr_in6 *dst;
	struct rtentry *rt;
};

static int ip6_tso_output_callback(void *, struct mbuf *);

static int
ip6_tso_output_callback(void *vp, struct mbuf *m)
{
	struct ip6_tso_output_args *args = vp;

	return nd6_output(args->ifp, args->origifp, m, args->dst, args->rt);
}

int
ip6_tso_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
    const struct sockaddr_in6 *dst, struct rtentry *rt)
{
	struct ip6_tso_output_args args;

	args.ifp = ifp;
	args.origifp = origifp;
	args.dst = dst;
	args.rt = rt;

	return tcp6_segment(m, ip6_tso_output_callback, &args);
}

/*
 * tcp6_segment: handle M_CSUM_TSOv6 by software.
 *
 * => always consume m.
 * => call output_func with output_arg for each segments.
 */

int
tcp6_segment(struct mbuf *m, int (*output_func)(void *, struct mbuf *),
    void *output_arg)
{
	int mss;
	int iphlen;
	int thlen;
	int hlen;
	int len;
	struct ip6_hdr *iph;
	struct tcphdr *th;
	uint32_t tcpseq;
	struct mbuf *hdr = NULL;
	struct mbuf *t;
	int error = 0;

	KASSERT((m->m_flags & M_PKTHDR) != 0);
	KASSERT((m->m_pkthdr.csum_flags & M_CSUM_TSOv6) != 0);

	m->m_pkthdr.csum_flags = 0;

	len = m->m_pkthdr.len;
	KASSERT(len >= sizeof(*iph) + sizeof(*th));

	if (m->m_len < sizeof(*iph)) {
		m = m_pullup(m, sizeof(*iph));
		if (m == NULL) {
			error = ENOMEM;
			goto quit;
		}
	}
	iph = mtod(m, struct ip6_hdr *);
	iphlen = sizeof(*iph);
	KASSERT((iph->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION);
	KASSERT(iph->ip6_nxt == IPPROTO_TCP);

	hlen = iphlen + sizeof(*th);
	if (m->m_len < hlen) {
		m = m_pullup(m, hlen);
		if (m == NULL) {
			error = ENOMEM;
			goto quit;
		}
	}
	th = (void *)(mtod(m, char *) + iphlen);
	tcpseq = ntohl(th->th_seq);
	thlen = th->th_off * 4;
	hlen = iphlen + thlen;

	mss = m->m_pkthdr.segsz;
	KASSERT(mss != 0);
	KASSERT(len > hlen);

	t = m_split(m, hlen, M_NOWAIT);
	if (t == NULL) {
		error = ENOMEM;
		goto quit;
	}
	hdr = m;
	m = t;
	len -= hlen;
	KASSERT(len % mss == 0);
	while (len > 0) {
		struct mbuf *n;

		n = m_dup(hdr, 0, hlen, M_NOWAIT);
		if (n == NULL) {
			error = ENOMEM;
			goto quit;
		}
		KASSERT(n->m_len == hlen); /* XXX */

		t = m_split(m, mss, M_NOWAIT);
		if (t == NULL) {
			m_freem(n);
			error = ENOMEM;
			goto quit;
		}
		m_cat(n, m);
		m = t;

		KASSERT(n->m_len >= hlen); /* XXX */

		n->m_pkthdr.len = hlen + mss;
		iph = mtod(n, struct ip6_hdr *);
		KASSERT((iph->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION);
		iph->ip6_plen = htons(thlen + mss);
		th = (void *)(mtod(n, char *) + iphlen);
		th->th_seq = htonl(tcpseq);
		th->th_sum = 0;
		th->th_sum = in6_cksum(n, IPPROTO_TCP, iphlen, thlen + mss);

		error = (*output_func)(output_arg, n);
		if (error) {
			goto quit;
		}

		tcpseq += mss;
		len -= mss;
	}

quit:
	if (hdr != NULL) {
		m_freem(hdr);
	}
	if (m != NULL) {
		m_freem(m);
	}

	return error;
}