Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.340 retrieving revision 1.370 diff -u -p -r1.340 -r1.370 --- src/sys/netinet/ip_input.c 2016/08/31 09:14:47 1.340 +++ src/sys/netinet/ip_input.c 2018/02/05 14:52:42 1.370 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.340 2016/08/31 09:14:47 ozaki-r Exp $ */ +/* $NetBSD: ip_input.c,v 1.370 2018/02/05 14:52:42 maxv Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -29,7 +29,7 @@ * SUCH DAMAGE. */ -/*- +/* * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * @@ -91,16 +91,16 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.340 2016/08/31 09:14:47 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.370 2018/02/05 14:52:42 maxv Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" -#include "opt_compat_netbsd.h" #include "opt_gateway.h" #include "opt_ipsec.h" #include "opt_mrouting.h" #include "opt_mbuftrace.h" #include "opt_inet_csum.h" +#include "opt_net_mpsafe.h" #endif #include "arp.h" @@ -153,55 +153,34 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #ifndef IPFORWARDING #ifdef GATEWAY #define IPFORWARDING 1 /* forward IP packets not for us */ -#else /* GATEWAY */ +#else #define IPFORWARDING 0 /* don't forward IP packets not for us */ -#endif /* GATEWAY */ -#endif /* IPFORWARDING */ -#ifndef IPSENDREDIRECTS -#define IPSENDREDIRECTS 1 #endif -#ifndef IPFORWSRCRT -#define IPFORWSRCRT 1 /* forward source-routed packets */ #endif -#ifndef IPALLOWSRCRT -#define IPALLOWSRCRT 1 /* allow source-routed packets */ -#endif -#ifndef IPMTUDISC -#define IPMTUDISC 1 + +#ifndef IPSENDREDIRECTS +#define IPSENDREDIRECTS 1 #endif + #ifndef IPMTUDISCTIMEOUT #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ #endif -#ifdef COMPAT_50 -#include -#include -#endif - -/* - * Note: DIRECTED_BROADCAST is handled this way so that previous - * configuration using this option will Just Work. - */ -#ifndef IPDIRECTEDBCAST #ifdef DIRECTED_BROADCAST #define IPDIRECTEDBCAST 1 #else #define IPDIRECTEDBCAST 0 -#endif /* DIRECTED_BROADCAST */ -#endif /* IPDIRECTEDBCAST */ -int ipforwarding = IPFORWARDING; -int ipsendredirects = IPSENDREDIRECTS; -int ip_defttl = IPDEFTTL; -int ip_forwsrcrt = IPFORWSRCRT; -int ip_directedbcast = IPDIRECTEDBCAST; -int ip_allowsrcrt = IPALLOWSRCRT; -int ip_mtudisc = IPMTUDISC; -int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; -#ifdef DIAGNOSTIC -int ipprintfs = 0; #endif -int ip_do_randomid = 0; +int ipforwarding = IPFORWARDING; +int ipsendredirects = IPSENDREDIRECTS; +int ip_defttl = IPDEFTTL; +int ip_forwsrcrt = 0; +int ip_directedbcast = IPDIRECTEDBCAST; +int ip_allowsrcrt = 0; +int ip_mtudisc = 1; +int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; +int ip_do_randomid = 0; /* * XXX - Setting ip_checkinterface mostly implements the receive side of @@ -252,30 +231,23 @@ EVCNT_ATTACH_STATIC(ip_swcsum); #endif /* INET_CSUM_COUNTERS */ /* - * We need to save the IP options in case a protocol wants to respond + * Used to save the IP options in case a protocol wants to respond * to an incoming packet over the same route if the packet got here * using IP source routing. This allows connection establishment and * maintenance when the remote end is on a network that is not known * to us. */ - -static int ip_nhops = 0; - -static struct ip_srcrt { - struct in_addr dst; /* final destination */ - char nop; /* one NOP to align */ - char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ - struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; -} ip_srcrt; +struct ip_srcrt { + int isr_nhops; /* number of hops */ + struct in_addr isr_dst; /* final destination */ + char isr_nop; /* one NOP to align */ + char isr_hdr[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN & OFFSET */ + struct in_addr isr_routes[MAX_IPOPTLEN/sizeof(struct in_addr)]; +}; static int ip_drainwanted; -struct sockaddr_in ipaddr = { - .sin_len = sizeof(ipaddr), - .sin_family = AF_INET, -}; - -static void save_rte(u_char *, struct in_addr); +static void save_rte(struct mbuf *, u_char *, struct in_addr); #ifdef MBUFTRACE struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx"); @@ -286,7 +258,7 @@ static void ipintr(void *); static void ip_input(struct mbuf *); static void ip_forward(struct mbuf *, int, struct ifnet *); static bool ip_dooptions(struct mbuf *); -static struct in_ifaddr *ip_rtaddr(struct in_addr); +static struct in_ifaddr *ip_rtaddr(struct in_addr, struct psref *); static void sysctl_net_inet_ip_setup(struct sysctllog **); static struct in_ifaddr *ip_match_our_address(struct ifnet *, struct ip *, @@ -294,9 +266,13 @@ static struct in_ifaddr *ip_match_our_ad static struct in_ifaddr *ip_match_our_address_broadcast(struct ifnet *, struct ip *); -/* XXX: Not yet enabled. */ +#ifdef NET_MPSAFE +#define SOFTNET_LOCK() mutex_enter(softnet_lock) +#define SOFTNET_UNLOCK() mutex_exit(softnet_lock) +#else #define SOFTNET_LOCK() KASSERT(mutex_owned(softnet_lock)) #define SOFTNET_UNLOCK() KASSERT(mutex_owned(softnet_lock)) +#endif /* * IP initialization: fill in IP protocol switch table. @@ -330,7 +306,6 @@ ip_init(void) ip_ids = ip_id_init(); ip_id = time_uptime & 0xfffff; - ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); #ifdef GATEWAY ipflow_init(); #endif @@ -345,10 +320,8 @@ ip_init(void) #endif /* MBUFTRACE */ ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); - ipforward_rt_percpu = percpu_alloc(sizeof(struct route)); - if (ipforward_rt_percpu == NULL) - panic("failed to allocate ipforward_rt_percpu"); + ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); } static struct in_ifaddr * @@ -382,10 +355,14 @@ ip_match_our_address(struct ifnet *ifp, continue; if (checkif && ia->ia_ifp != ifp) continue; - if ((ia->ia_ifp->if_flags & IFF_UP) != 0) - break; - else + if ((ia->ia_ifp->if_flags & IFF_UP) == 0) { (*downmatch)++; + continue; + } + if (ia->ia4_flags & IN_IFF_DETACHED && + (ifp->if_flags & IFF_LOOPBACK) == 0) + continue; + break; } } @@ -404,6 +381,9 @@ ip_match_our_address_broadcast(struct if ia = ifatoia(ifa); if (ia->ia4_flags & IN_IFF_NOTREADY) continue; + if (ia->ia4_flags & IN_IFF_DETACHED && + (ifp->if_flags & IFF_LOOPBACK) == 0) + continue; if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || /* @@ -436,11 +416,11 @@ ipintr(void *arg __unused) KASSERT(cpu_softintr_p()); - mutex_enter(softnet_lock); + SOFTNET_LOCK_UNLESS_NET_MPSAFE(); while ((m = pktq_dequeue(ip_pktq)) != NULL) { ip_input(m); } - mutex_exit(softnet_lock); + SOFTNET_UNLOCK_UNLESS_NET_MPSAFE(); } /* @@ -486,13 +466,13 @@ ip_input(struct mbuf *m) */ if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), - (max_linkhdr + 3) & ~3)) == NULL) { + (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP_STATINC(IP_STAT_TOOSMALL); goto out; } - } else if (__predict_false(m->m_len < sizeof (struct ip))) { - if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { + } else if (__predict_false(m->m_len < sizeof(struct ip))) { + if ((m = m_pullup(m, sizeof(struct ip))) == NULL) { IP_STATINC(IP_STAT_TOOSMALL); goto out; } @@ -534,8 +514,7 @@ ip_input(struct mbuf *m) } switch (m->m_pkthdr.csum_flags & - ((ifp->if_csum_flags_rx & M_CSUM_IPv4) | - M_CSUM_IPv4_BAD)) { + ((ifp->if_csum_flags_rx & M_CSUM_IPv4) | M_CSUM_IPv4_BAD)) { case M_CSUM_IPv4|M_CSUM_IPv4_BAD: INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); IP_STATINC(IP_STAT_BADSUM); @@ -574,10 +553,9 @@ ip_input(struct mbuf *m) } /* - * Check that the amount of data in the buffers - * is as at least much as the IP header would have us expect. - * Trim mbufs if longer than we expect. - * Drop packet if shorter than we expect. + * Check that the amount of data in the buffers is at least as much + * as the IP header would have us expect. Trim mbufs if longer than + * we expect. Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { IP_STATINC(IP_STAT_TOOSHORT); @@ -613,10 +591,9 @@ ip_input(struct mbuf *m) struct in_addr odst = ip->ip_dst; bool freed; - SOFTNET_LOCK(); freed = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0; - SOFTNET_UNLOCK(); if (freed || m == NULL) { + m = NULL; goto out; } ip = mtod(m, struct ip *); @@ -646,6 +623,7 @@ ip_input(struct mbuf *m) if ((*altq_input)(m, AF_INET) == 0) { /* Packet dropped by traffic conditioner. */ SOFTNET_UNLOCK(); + m = NULL; goto out; } SOFTNET_UNLOCK(); @@ -658,9 +636,10 @@ ip_input(struct mbuf *m) * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ - ip_nhops = 0; /* for source routed packets */ - if (hlen > sizeof (struct ip) && ip_dooptions(m)) + if (hlen > sizeof(struct ip) && ip_dooptions(m)) { + m = NULL; goto out; + } /* * Check our list of addresses, to see if the packet is for us. @@ -757,15 +736,12 @@ ip_input(struct mbuf *m) return; } #ifdef IPSEC - /* Perform IPsec, if any. */ + /* Check the security policy (SP) for the packet */ if (ipsec_used) { - SOFTNET_LOCK(); if (ipsec4_input(m, IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)) != 0) { - SOFTNET_UNLOCK(); goto out; } - SOFTNET_UNLOCK(); } #endif ip_forward(m, srcrt, ifp); @@ -794,7 +770,7 @@ ours: } /* * Reassembly is done, we have the final packet. - * Updated cached data in local variable(s). + * Update cached data in local variable(s). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; @@ -808,12 +784,9 @@ ours: */ if (ipsec_used && (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { - SOFTNET_LOCK(); if (ipsec4_input(m, 0) != 0) { - SOFTNET_UNLOCK(); goto out; } - SOFTNET_UNLOCK(); } #endif @@ -828,7 +801,7 @@ ours: * is expensive, so explore ia here again. */ s = pserialize_read_enter(); - _ia = in_get_ia(ip->ip_dst.s_addr); + _ia = in_get_ia(ip->ip_dst); _ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); pserialize_read_exit(s); } @@ -837,9 +810,7 @@ ours: const int off = hlen, nh = ip->ip_p; - SOFTNET_LOCK(); (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); - SOFTNET_UNLOCK(); return; out: @@ -855,13 +826,11 @@ void ip_slowtimo(void) { - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); + SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); ip_reass_slowtimo(); - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); + SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); } /* @@ -891,14 +860,19 @@ ip_dooptions(struct mbuf *m) struct ip_timestamp *ipt; struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; + int srr_present, rr_present, ts_present; struct in_addr dst; n_time ntime; - struct ifaddr *ifa; + struct ifaddr *ifa = NULL; int s; + srr_present = 0; + rr_present = 0; + ts_present = 0; + dst = ip->ip_dst; cp = (u_char *)(ip + 1); - cnt = (ip->ip_hl << 2) - sizeof (struct ip); + cnt = (ip->ip_hl << 2) - sizeof(struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) @@ -931,12 +905,22 @@ ip_dooptions(struct mbuf *m) * address is on directly accessible net. */ case IPOPT_LSRR: - case IPOPT_SSRR: + case IPOPT_SSRR: { + struct psref psref; + struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, + }; + if (ip_allowsrcrt == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_NET_PROHIB; goto bad; } + if (srr_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -969,7 +953,7 @@ ip_dooptions(struct mbuf *m) /* * End of source route. Should be for us. */ - save_rte(cp, ip->ip_src); + save_rte(m, cp, ip->ip_src); break; } /* @@ -977,34 +961,44 @@ ip_dooptions(struct mbuf *m) */ memcpy((void *)&ipaddr.sin_addr, (void *)(cp + off), sizeof(ipaddr.sin_addr)); - s = pserialize_read_enter(); if (opt == IPOPT_SSRR) { - ifa = ifa_ifwithladdr(sintosa(&ipaddr)); + ifa = ifa_ifwithladdr_psref(sintosa(&ipaddr), + &psref); if (ifa != NULL) ia = ifatoia(ifa); else ia = NULL; } else { - ia = ip_rtaddr(ipaddr.sin_addr); + ia = ip_rtaddr(ipaddr.sin_addr, &psref); } if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; - pserialize_read_exit(s); goto bad; } ip->ip_dst = ipaddr.sin_addr; bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); - pserialize_read_exit(s); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ip->ip_dst.s_addr); break; + } + + case IPOPT_RR: { + struct psref psref; + struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, + }; - case IPOPT_RR: + if (rr_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -1025,12 +1019,10 @@ ip_dooptions(struct mbuf *m) * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ - s = pserialize_read_enter(); - ifa = ifa_ifwithaddr(sintosa(&ipaddr)); + ifa = ifa_ifwithaddr_psref(sintosa(&ipaddr), &psref); if (ifa == NULL) { - ia = ip_rtaddr(ipaddr.sin_addr); + ia = ip_rtaddr(ipaddr.sin_addr, &psref); if (ia == NULL) { - pserialize_read_exit(s); type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; @@ -1040,13 +1032,18 @@ ip_dooptions(struct mbuf *m) } bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); - pserialize_read_exit(s); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; + } case IPOPT_TS: code = cp - (u_char *)ip; ipt = (struct ip_timestamp *)cp; + if (ts_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (ipt->ipt_len < 4 || ipt->ipt_len > 40) { code = (u_char *)&ipt->ipt_len - (u_char *)ip; goto bad; @@ -1055,7 +1052,7 @@ ip_dooptions(struct mbuf *m) code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; goto bad; } - if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { + if (ipt->ipt_ptr > ipt->ipt_len - sizeof(int32_t)) { if (++ipt->ipt_oflw == 0) { code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; @@ -1072,6 +1069,10 @@ ip_dooptions(struct mbuf *m) case IPOPT_TS_TSANDADDR: { struct ifnet *rcvif; int _s, _ss; + struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, + }; if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) { @@ -1082,7 +1083,10 @@ ip_dooptions(struct mbuf *m) ipaddr.sin_addr = dst; _ss = pserialize_read_enter(); rcvif = m_get_rcvif(m, &_s); - ifa = ifaof_ifpforaddr(sintosa(&ipaddr), rcvif); + if (__predict_true(rcvif != NULL)) { + ifa = ifaof_ifpforaddr(sintosa(&ipaddr), + rcvif); + } m_put_rcvif(rcvif, &_s); if (ifa == NULL) { pserialize_read_exit(_ss); @@ -1096,7 +1100,12 @@ ip_dooptions(struct mbuf *m) break; } - case IPOPT_TS_PRESPEC: + case IPOPT_TS_PRESPEC: { + struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, + }; + if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) { code = (u_char *)&ipt->ipt_ptr - @@ -1114,6 +1123,7 @@ ip_dooptions(struct mbuf *m) pserialize_read_exit(s); ipt->ipt_ptr += sizeof(struct in_addr); break; + } default: /* XXX can't take &ipt->ipt_flg */ @@ -1160,7 +1170,7 @@ bad: * return internet address info of interface to be used to get there. */ static struct in_ifaddr * -ip_rtaddr(struct in_addr dst) +ip_rtaddr(struct in_addr dst, struct psref *psref) { struct rtentry *rt; union { @@ -1171,13 +1181,16 @@ ip_rtaddr(struct in_addr dst) sockaddr_in_init(&u.dst4, &dst, 0); - SOFTNET_LOCK(); ro = percpu_getref(ipforward_rt_percpu); rt = rtcache_lookup(ro, &u.dst); - percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); - if (rt == NULL) + if (rt == NULL) { + percpu_putref(ipforward_rt_percpu); return NULL; + } + + ia4_acquire(ifatoia(rt->rt_ifa), psref); + rtcache_unref(rt, ro); + percpu_putref(ipforward_rt_percpu); return ifatoia(rt->rt_ifa); } @@ -1187,16 +1200,25 @@ ip_rtaddr(struct in_addr dst) * up later by ip_srcroute if the receiver is interested. */ static void -save_rte(u_char *option, struct in_addr dst) +save_rte(struct mbuf *m, u_char *option, struct in_addr dst) { + struct ip_srcrt *isr; + struct m_tag *mtag; unsigned olen; olen = option[IPOPT_OLEN]; - if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) + if (olen > sizeof(isr->isr_hdr) + sizeof(isr->isr_routes)) return; - memcpy((void *)ip_srcrt.srcopt, (void *)option, olen); - ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); - ip_srcrt.dst = dst; + + mtag = m_tag_get(PACKET_TAG_SRCROUTE, sizeof(*isr), M_NOWAIT); + if (mtag == NULL) + return; + isr = (struct ip_srcrt *)(mtag + 1); + + memcpy(isr->isr_hdr, option, olen); + isr->isr_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); + isr->isr_dst = dst; + m_tag_prepend(m, mtag); } /* @@ -1205,36 +1227,43 @@ save_rte(u_char *option, struct in_addr * The first hop is placed before the options, will be removed later. */ struct mbuf * -ip_srcroute(void) +ip_srcroute(struct mbuf *m0) { struct in_addr *p, *q; struct mbuf *m; + struct ip_srcrt *isr; + struct m_tag *mtag; + + mtag = m_tag_find(m0, PACKET_TAG_SRCROUTE, NULL); + if (mtag == NULL) + return NULL; + isr = (struct ip_srcrt *)(mtag + 1); - if (ip_nhops == 0) + if (isr->isr_nhops == 0) return NULL; + m = m_get(M_DONTWAIT, MT_SOOPTS); - if (m == 0) + if (m == NULL) return NULL; MCLAIM(m, &inetdomain.dom_mowner); -#define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) +#define OPTSIZ (sizeof(isr->isr_nop) + sizeof(isr->isr_hdr)) - /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ - m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + - OPTSIZ; + /* length is (nhops+1)*sizeof(addr) + sizeof(nop + header) */ + m->m_len = (isr->isr_nhops + 1) * sizeof(struct in_addr) + OPTSIZ; /* * First save first hop for return route */ - p = &ip_srcrt.route[ip_nhops - 1]; + p = &(isr->isr_routes[isr->isr_nhops - 1]); *(mtod(m, struct in_addr *)) = *p--; /* * Copy option fields and padding (nop) to mbuf. */ - ip_srcrt.nop = IPOPT_NOP; - ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; - memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop, + isr->isr_nop = IPOPT_NOP; + isr->isr_hdr[IPOPT_OFFSET] = IPOPT_MINOFF; + memmove(mtod(m, char *) + sizeof(struct in_addr), &isr->isr_nop, OPTSIZ); q = (struct in_addr *)(mtod(m, char *) + sizeof(struct in_addr) + OPTSIZ); @@ -1243,14 +1272,15 @@ ip_srcroute(void) * Record return path as an IP source route, * reversing the path (pointers are now aligned). */ - while (p >= ip_srcrt.route) { + while (p >= isr->isr_routes) { *q++ = *p--; } /* * Last hop goes to final destination. */ - *q = ip_srcrt.dst; - return (m); + *q = isr->isr_dst; + m_tag_delete(m0, mtag); + return m; } const int inetctlerrmap[PRC_NCMDS] = { @@ -1329,21 +1359,18 @@ ip_forward(struct mbuf *m, int srcrt, st return; } - SOFTNET_LOCK(); - if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); - SOFTNET_UNLOCK(); return; } sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); ro = percpu_getref(ipforward_rt_percpu); - if ((rt = rtcache_lookup(ro, &u.dst)) == NULL) { + rt = rtcache_lookup(ro, &u.dst); + if (rt == NULL) { percpu_putref(ipforward_rt_percpu); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0); - SOFTNET_UNLOCK(); return; } @@ -1385,6 +1412,7 @@ ip_forward(struct mbuf *m, int srcrt, st code = ICMP_REDIRECT_HOST; } } + rtcache_unref(rt, ro); error = ip_output(m, NULL, ro, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), @@ -1414,14 +1442,12 @@ ip_forward(struct mbuf *m, int srcrt, st } percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); return; redirect: error: if (mcopy == NULL) { percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); return; } @@ -1444,8 +1470,10 @@ error: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - if ((rt = rtcache_validate(ro)) != NULL) + if ((rt = rtcache_validate(ro)) != NULL) { destmtu = rt->rt_ifp->if_mtu; + rtcache_unref(rt, ro); + } #ifdef IPSEC if (ipsec_used) (void)ipsec4_forward(mcopy, &destmtu); @@ -1463,12 +1491,10 @@ error: if (mcopy) m_freem(mcopy); percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); return; } icmp_error(mcopy, type, code, dest, destmtu); percpu_putref(ipforward_rt_percpu); - SOFTNET_UNLOCK(); } void @@ -1476,55 +1502,39 @@ ip_savecontrol(struct inpcb *inp, struct struct mbuf *m) { struct socket *so = inp->inp_socket; - ifnet_t *ifp; int inpflags = inp->inp_flags; - struct psref psref; - ifp = m_get_rcvif_psref(m, &psref); - if (__predict_false(ifp == NULL)) - return; /* XXX should report error? */ + if (SOOPT_TIMESTAMP(so->so_options)) + mp = sbsavetimestamp(so->so_options, m, mp); - if (so->so_options & SO_TIMESTAMP -#ifdef SO_OTIMESTAMP - || so->so_options & SO_OTIMESTAMP -#endif - ) { - struct timeval tv; - - microtime(&tv); -#ifdef SO_OTIMESTAMP - if (so->so_options & SO_OTIMESTAMP) { - struct timeval50 tv50; - timeval_to_timeval50(&tv, &tv50); - *mp = sbcreatecontrol((void *) &tv50, sizeof(tv50), - SCM_OTIMESTAMP, SOL_SOCKET); - } else -#endif - *mp = sbcreatecontrol((void *) &tv, sizeof(tv), - SCM_TIMESTAMP, SOL_SOCKET); - if (*mp) - mp = &(*mp)->m_next; - } if (inpflags & INP_RECVDSTADDR) { - *mp = sbcreatecontrol((void *) &ip->ip_dst, + *mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_RECVPKTINFO) { - struct in_pktinfo ipi; - ipi.ipi_addr = ip->ip_src; - ipi.ipi_ifindex = ifp->if_index; - *mp = sbcreatecontrol((void *) &ipi, - sizeof(ipi), IP_RECVPKTINFO, IPPROTO_IP); + + if (inpflags & INP_RECVTTL) { + *mp = sbcreatecontrol(&ip->ip_ttl, + sizeof(uint8_t), IP_RECVTTL, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_PKTINFO) { + + struct psref psref; + ifnet_t *ifp = m_get_rcvif_psref(m, &psref); + if (__predict_false(ifp == NULL)) { +#ifdef DIAGNOSTIC + printf("%s: missing receive interface\n", __func__); +#endif + return; /* XXX should report error? */ + } + + if (inpflags & INP_RECVPKTINFO) { struct in_pktinfo ipi; ipi.ipi_addr = ip->ip_dst; ipi.ipi_ifindex = ifp->if_index; - *mp = sbcreatecontrol((void *) &ipi, + *mp = sbcreatecontrol(&ipi, sizeof(ipi), IP_PKTINFO, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; @@ -1538,12 +1548,6 @@ ip_savecontrol(struct inpcb *inp, struct if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_RECVTTL) { - *mp = sbcreatecontrol((void *) &ip->ip_ttl, - sizeof(uint8_t), IP_RECVTTL, IPPROTO_IP); - if (*mp) - mp = &(*mp)->m_next; - } m_put_rcvif_psref(ifp, &psref); } @@ -1583,23 +1587,25 @@ sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS int error, tmp; struct sysctlnode node; + icmp_mtudisc_lock(); + node = *rnode; tmp = ip_mtudisc_timeout; node.sysctl_data = &tmp; error = sysctl_lookup(SYSCTLFN_CALL(&node)); if (error || newp == NULL) - return (error); - if (tmp < 0) - return (EINVAL); - - mutex_enter(softnet_lock); + goto out; + if (tmp < 0) { + error = EINVAL; + goto out; + } ip_mtudisc_timeout = tmp; rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout); - - mutex_exit(softnet_lock); - - return (0); + error = 0; +out: + icmp_mtudisc_unlock(); + return error; } static int