Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.337.2.5 retrieving revision 1.376.2.2 diff -u -p -r1.337.2.5 -r1.376.2.2 --- src/sys/netinet/ip_input.c 2017/04/26 02:53:29 1.337.2.5 +++ src/sys/netinet/ip_input.c 2018/05/02 07:20:23 1.376.2.2 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.337.2.5 2017/04/26 02:53:29 pgoyette Exp $ */ +/* $NetBSD: ip_input.c,v 1.376.2.2 2018/05/02 07:20:23 pgoyette Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -29,7 +29,7 @@ * SUCH DAMAGE. */ -/*- +/* * Copyright (c) 1998 The NetBSD Foundation, Inc. * All rights reserved. * @@ -91,11 +91,10 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.337.2.5 2017/04/26 02:53:29 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.376.2.2 2018/05/02 07:20:23 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" -#include "opt_compat_netbsd.h" #include "opt_gateway.h" #include "opt_ipsec.h" #include "opt_mrouting.h" @@ -154,55 +153,22 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #ifndef IPFORWARDING #ifdef GATEWAY #define IPFORWARDING 1 /* forward IP packets not for us */ -#else /* GATEWAY */ +#else #define IPFORWARDING 0 /* don't forward IP packets not for us */ -#endif /* GATEWAY */ -#endif /* IPFORWARDING */ -#ifndef IPSENDREDIRECTS -#define IPSENDREDIRECTS 1 -#endif -#ifndef IPFORWSRCRT -#define IPFORWSRCRT 1 /* forward source-routed packets */ -#endif -#ifndef IPALLOWSRCRT -#define IPALLOWSRCRT 1 /* allow source-routed packets */ -#endif -#ifndef IPMTUDISC -#define IPMTUDISC 1 #endif -#ifndef IPMTUDISCTIMEOUT -#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ -#endif - -#ifdef COMPAT_50 -#include -#include #endif -/* - * Note: DIRECTED_BROADCAST is handled this way so that previous - * configuration using this option will Just Work. - */ -#ifndef IPDIRECTEDBCAST -#ifdef DIRECTED_BROADCAST -#define IPDIRECTEDBCAST 1 -#else -#define IPDIRECTEDBCAST 0 -#endif /* DIRECTED_BROADCAST */ -#endif /* IPDIRECTEDBCAST */ -int ipforwarding = IPFORWARDING; -int ipsendredirects = IPSENDREDIRECTS; -int ip_defttl = IPDEFTTL; -int ip_forwsrcrt = IPFORWSRCRT; -int ip_directedbcast = IPDIRECTEDBCAST; -int ip_allowsrcrt = IPALLOWSRCRT; -int ip_mtudisc = IPMTUDISC; -int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; -#ifdef DIAGNOSTIC -int ipprintfs = 0; -#endif +#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ -int ip_do_randomid = 0; +int ipforwarding = IPFORWARDING; +int ipsendredirects = 1; +int ip_defttl = IPDEFTTL; +int ip_forwsrcrt = 0; +int ip_directedbcast = 0; +int ip_allowsrcrt = 0; +int ip_mtudisc = 1; +int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; +int ip_do_randomid = 0; /* * XXX - Setting ip_checkinterface mostly implements the receive side of @@ -226,7 +192,7 @@ pfil_head_t * inet_pfil_hook __read_mo ipid_state_t * ip_ids __read_mostly; percpu_t * ipstat_percpu __read_mostly; -static struct route ipforward_rt __cacheline_aligned; +static percpu_t *ipforward_rt_percpu __cacheline_aligned; uint16_t ip_id; @@ -339,14 +305,10 @@ ip_init(void) #ifdef MBUFTRACE MOWNER_ATTACH(&ip_tx_mowner); MOWNER_ATTACH(&ip_rx_mowner); -#endif /* MBUFTRACE */ +#endif ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); - ipforward_rt_percpu = percpu_alloc(sizeof(struct route)); - if (ipforward_rt_percpu == NULL) - panic("failed to allocate ipforward_rt_percpu"); - ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); } @@ -381,10 +343,14 @@ ip_match_our_address(struct ifnet *ifp, continue; if (checkif && ia->ia_ifp != ifp) continue; - if ((ia->ia_ifp->if_flags & IFF_UP) != 0) - break; - else + if ((ia->ia_ifp->if_flags & IFF_UP) == 0) { (*downmatch)++; + continue; + } + if (ia->ia4_flags & IN_IFF_DETACHED && + (ifp->if_flags & IFF_LOOPBACK) == 0) + continue; + break; } } @@ -403,6 +369,9 @@ ip_match_our_address_broadcast(struct if ia = ifatoia(ifa); if (ia->ia4_flags & IN_IFF_NOTREADY) continue; + if (ia->ia4_flags & IN_IFF_DETACHED && + (ifp->if_flags & IFF_LOOPBACK) == 0) + continue; if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || /* @@ -435,15 +404,11 @@ ipintr(void *arg __unused) KASSERT(cpu_softintr_p()); -#ifndef NET_MPSAFE - mutex_enter(softnet_lock); -#endif + SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); while ((m = pktq_dequeue(ip_pktq)) != NULL) { ip_input(m); } -#ifndef NET_MPSAFE - mutex_exit(softnet_lock); -#endif + SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); } /* @@ -489,13 +454,13 @@ ip_input(struct mbuf *m) */ if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) { if ((m = m_copyup(m, sizeof(struct ip), - (max_linkhdr + 3) & ~3)) == NULL) { + (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ IP_STATINC(IP_STAT_TOOSMALL); goto out; } - } else if (__predict_false(m->m_len < sizeof (struct ip))) { - if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { + } else if (__predict_false(m->m_len < sizeof(struct ip))) { + if ((m = m_pullup(m, sizeof(struct ip))) == NULL) { IP_STATINC(IP_STAT_TOOSMALL); goto out; } @@ -537,8 +502,7 @@ ip_input(struct mbuf *m) } switch (m->m_pkthdr.csum_flags & - ((ifp->if_csum_flags_rx & M_CSUM_IPv4) | - M_CSUM_IPv4_BAD)) { + ((ifp->if_csum_flags_rx & M_CSUM_IPv4) | M_CSUM_IPv4_BAD)) { case M_CSUM_IPv4|M_CSUM_IPv4_BAD: INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); IP_STATINC(IP_STAT_BADSUM); @@ -577,10 +541,9 @@ ip_input(struct mbuf *m) } /* - * Check that the amount of data in the buffers - * is as at least much as the IP header would have us expect. - * Trim mbufs if longer than we expect. - * Drop packet if shorter than we expect. + * Check that the amount of data in the buffers is at least as much + * as the IP header would have us expect. Trim mbufs if longer than + * we expect. Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { IP_STATINC(IP_STAT_TOOSHORT); @@ -606,6 +569,9 @@ ip_input(struct mbuf *m) * not fast-forwarded, they must clear the M_CANFASTFWD flag. * Note that filters must _never_ set this flag, as another filter * in the list may have previously cleared it. + * + * Don't call hooks if the packet has already been processed by + * IPsec (encapsulated, tunnel mode). */ #if defined(IPSEC) if (!ipsec_used || !ipsec_indone(m)) @@ -629,7 +595,7 @@ ip_input(struct mbuf *m) * from generating ICMP redirects for packets that have * been redirected by a hook back out on to the same LAN that * they came from and is not an indication that the packet - * is being inffluenced by source routing options. This + * is being influenced by source routing options. This * allows things like * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp" * where tlp0 is both on the 1.1.1.0/24 network and is the @@ -661,7 +627,7 @@ ip_input(struct mbuf *m) * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ - if (hlen > sizeof (struct ip) && ip_dooptions(m)) { + if (hlen > sizeof(struct ip) && ip_dooptions(m)) { m = NULL; goto out; } @@ -761,15 +727,11 @@ ip_input(struct mbuf *m) return; } #ifdef IPSEC - /* Perform IPsec, if any. */ + /* Check the security policy (SP) for the packet */ if (ipsec_used) { - SOFTNET_LOCK(); - if (ipsec4_input(m, IP_FORWARDING | - (ip_directedbcast ? IP_ALLOWBROADCAST : 0)) != 0) { - SOFTNET_UNLOCK(); + if (ipsec4_input(m, IP_FORWARDING) != 0) { goto out; } - SOFTNET_UNLOCK(); } #endif ip_forward(m, srcrt, ifp); @@ -798,12 +760,14 @@ ours: } /* * Reassembly is done, we have the final packet. - * Updated cached data in local variable(s). + * Update cached data in local variable(s). */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; } + M_VERIFY_PACKET(m); + #ifdef IPSEC /* * Enforce IPsec policy checking if we are seeing last header. @@ -812,12 +776,9 @@ ours: */ if (ipsec_used && (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { - SOFTNET_LOCK(); if (ipsec4_input(m, 0) != 0) { - SOFTNET_UNLOCK(); goto out; } - SOFTNET_UNLOCK(); } #endif @@ -825,7 +786,7 @@ ours: * Switch out to protocol's input routine. */ #if IFA_STATS - if (ia && ip) { + if (ia) { struct in_ifaddr *_ia; /* * Keep a reference from ip_match_our_address with psref @@ -841,9 +802,7 @@ ours: const int off = hlen, nh = ip->ip_p; - SOFTNET_LOCK(); (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); - SOFTNET_UNLOCK(); return; out: @@ -859,17 +818,11 @@ void ip_slowtimo(void) { -#ifndef NET_MPSAFE - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); -#endif + SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE(); ip_reass_slowtimo(); -#ifndef NET_MPSAFE - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); -#endif + SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); } /* @@ -899,14 +852,19 @@ ip_dooptions(struct mbuf *m) struct ip_timestamp *ipt; struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; + int srr_present, rr_present, ts_present; struct in_addr dst; n_time ntime; struct ifaddr *ifa = NULL; int s; + srr_present = 0; + rr_present = 0; + ts_present = 0; + dst = ip->ip_dst; cp = (u_char *)(ip + 1); - cnt = (ip->ip_hl << 2) - sizeof (struct ip); + cnt = (ip->ip_hl << 2) - sizeof(struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) @@ -951,6 +909,10 @@ ip_dooptions(struct mbuf *m) code = ICMP_UNREACH_NET_PROHIB; goto bad; } + if (srr_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -1007,8 +969,8 @@ ip_dooptions(struct mbuf *m) goto bad; } ip->ip_dst = ipaddr.sin_addr; - bcopy((void *)&ia->ia_addr.sin_addr, - (void *)(cp + off), sizeof(struct in_addr)); + memcpy(cp + off, &ia->ia_addr.sin_addr, + sizeof(struct in_addr)); ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* @@ -1025,6 +987,10 @@ ip_dooptions(struct mbuf *m) .sin_family = AF_INET, }; + if (rr_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -1039,7 +1005,7 @@ ip_dooptions(struct mbuf *m) off--; /* 0 origin */ if ((off + sizeof(struct in_addr)) > optlen) break; - memcpy((void *)&ipaddr.sin_addr, (void *)(&ip->ip_dst), + memcpy((void *)&ipaddr.sin_addr, (void *)&ip->ip_dst, sizeof(ipaddr.sin_addr)); /* * locate outgoing interface; if we're the destination, @@ -1056,8 +1022,8 @@ ip_dooptions(struct mbuf *m) } else { ia = ifatoia(ifa); } - bcopy((void *)&ia->ia_addr.sin_addr, - (void *)(cp + off), sizeof(struct in_addr)); + memcpy(cp + off, &ia->ia_addr.sin_addr, + sizeof(struct in_addr)); ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; @@ -1066,6 +1032,10 @@ ip_dooptions(struct mbuf *m) case IPOPT_TS: code = cp - (u_char *)ip; ipt = (struct ip_timestamp *)cp; + if (ts_present++) { + code = &cp[IPOPT_OPTVAL] - (u_char *)ip; + goto bad; + } if (ipt->ipt_len < 4 || ipt->ipt_len > 40) { code = (u_char *)&ipt->ipt_len - (u_char *)ip; goto bad; @@ -1074,7 +1044,7 @@ ip_dooptions(struct mbuf *m) code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; goto bad; } - if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) { + if (ipt->ipt_ptr > ipt->ipt_len - sizeof(int32_t)) { if (++ipt->ipt_oflw == 0) { code = (u_char *)&ipt->ipt_ptr - (u_char *)ip; @@ -1115,8 +1085,8 @@ ip_dooptions(struct mbuf *m) break; } ia = ifatoia(ifa); - bcopy(&ia->ia_addr.sin_addr, - cp0, sizeof(struct in_addr)); + memcpy(cp0, &ia->ia_addr.sin_addr, + sizeof(struct in_addr)); pserialize_read_exit(_ss); ipt->ipt_ptr += sizeof(struct in_addr); break; @@ -1199,6 +1169,7 @@ ip_rtaddr(struct in_addr dst, struct psr struct sockaddr dst; struct sockaddr_in dst4; } u; + struct route *ro; sockaddr_in_init(&u.dst4, &dst, 0); @@ -1358,6 +1329,7 @@ ip_forward(struct mbuf *m, int srcrt, st struct sockaddr_in dst4; } u; uint64_t *ips; + struct route *ro; KASSERTMSG(cpu_softintr_p(), "ip_forward: not in the software " "interrupt handler; synchronization assumptions violated"); @@ -1434,7 +1406,7 @@ ip_forward(struct mbuf *m, int srcrt, st } rtcache_unref(rt, ro); - error = ip_output(m, NULL, &ipforward_rt, + error = ip_output(m, NULL, ro, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), NULL, NULL); @@ -1456,7 +1428,7 @@ ip_forward(struct mbuf *m, int srcrt, st if (mcopy) { #ifdef GATEWAY if (mcopy->m_flags & M_CANFASTFWD) - ipflow_create(&ipforward_rt, mcopy); + ipflow_create(ro, mcopy); #endif m_freem(mcopy); } @@ -1505,7 +1477,7 @@ error: /* * Do not generate ICMP_SOURCEQUENCH as required in RFC 1812, * Requirements for IP Version 4 Routers. Source quench can - * big problem under DoS attacks or if the underlying + * be a big problem under DoS attacks or if the underlying * interface is rate-limited. */ if (mcopy) @@ -1522,55 +1494,39 @@ ip_savecontrol(struct inpcb *inp, struct struct mbuf *m) { struct socket *so = inp->inp_socket; - ifnet_t *ifp; int inpflags = inp->inp_flags; - struct psref psref; - - ifp = m_get_rcvif_psref(m, &psref); - if (__predict_false(ifp == NULL)) - return; /* XXX should report error? */ - if (so->so_options & SO_TIMESTAMP -#ifdef SO_OTIMESTAMP - || so->so_options & SO_OTIMESTAMP -#endif - ) { - struct timeval tv; + if (SOOPT_TIMESTAMP(so->so_options)) + mp = sbsavetimestamp(so->so_options, mp); - microtime(&tv); -#ifdef SO_OTIMESTAMP - if (so->so_options & SO_OTIMESTAMP) { - struct timeval50 tv50; - timeval_to_timeval50(&tv, &tv50); - *mp = sbcreatecontrol((void *) &tv50, sizeof(tv50), - SCM_OTIMESTAMP, SOL_SOCKET); - } else -#endif - *mp = sbcreatecontrol((void *) &tv, sizeof(tv), - SCM_TIMESTAMP, SOL_SOCKET); - if (*mp) - mp = &(*mp)->m_next; - } if (inpflags & INP_RECVDSTADDR) { - *mp = sbcreatecontrol((void *) &ip->ip_dst, + *mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_RECVPKTINFO) { - struct in_pktinfo ipi; - ipi.ipi_addr = ip->ip_src; - ipi.ipi_ifindex = ifp->if_index; - *mp = sbcreatecontrol((void *) &ipi, - sizeof(ipi), IP_RECVPKTINFO, IPPROTO_IP); + + if (inpflags & INP_RECVTTL) { + *mp = sbcreatecontrol(&ip->ip_ttl, + sizeof(uint8_t), IP_RECVTTL, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_PKTINFO) { + + struct psref psref; + ifnet_t *ifp = m_get_rcvif_psref(m, &psref); + if (__predict_false(ifp == NULL)) { +#ifdef DIAGNOSTIC + printf("%s: missing receive interface\n", __func__); +#endif + return; /* XXX should report error? */ + } + + if (inpflags & INP_RECVPKTINFO) { struct in_pktinfo ipi; ipi.ipi_addr = ip->ip_dst; ipi.ipi_ifindex = ifp->if_index; - *mp = sbcreatecontrol((void *) &ipi, + *mp = sbcreatecontrol(&ipi, sizeof(ipi), IP_PKTINFO, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; @@ -1584,12 +1540,6 @@ ip_savecontrol(struct inpcb *inp, struct if (*mp) mp = &(*mp)->m_next; } - if (inpflags & INP_RECVTTL) { - *mp = sbcreatecontrol((void *) &ip->ip_ttl, - sizeof(uint8_t), IP_RECVTTL, IPPROTO_IP); - if (*mp) - mp = &(*mp)->m_next; - } m_put_rcvif_psref(ifp, &psref); } @@ -1694,15 +1644,6 @@ sysctl_net_inet_ip_setup(struct sysctllo NULL, 0, &ip_defttl, 0, CTL_NET, PF_INET, IPPROTO_IP, IPCTL_DEFTTL, CTL_EOL); -#ifdef IPCTL_DEFMTU - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT /* |CTLFLAG_READWRITE? */, - CTLTYPE_INT, "mtu", - SYSCTL_DESCR("Default MTA for an INET route"), - NULL, 0, &ip_mtu, 0, - CTL_NET, PF_INET, IPPROTO_IP, - IPCTL_DEFMTU, CTL_EOL); -#endif /* IPCTL_DEFMTU */ sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "forwsrcrt",