Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.316 retrieving revision 1.326 diff -u -p -r1.316 -r1.326 --- src/sys/netinet/ip_input.c 2014/05/29 23:02:48 1.316 +++ src/sys/netinet/ip_input.c 2016/01/08 03:55:39 1.326 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $ */ +/* $NetBSD: ip_input.c,v 1.326 2016/01/08 03:55:39 knakahara Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,8 +91,9 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.316 2014/05/29 23:02:48 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.326 2016/01/08 03:55:39 knakahara Exp $"); +#ifdef _KERNEL_OPT #include "opt_inet.h" #include "opt_compat_netbsd.h" #include "opt_gateway.h" @@ -100,9 +101,13 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include "opt_mrouting.h" #include "opt_mbuftrace.h" #include "opt_inet_csum.h" +#endif + +#include "arp.h" #include #include +#include #include #include #include @@ -118,6 +123,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include #include #include +#include #include #include @@ -210,18 +216,18 @@ int ip_do_randomid = 0; * to the loopback interface instead of the interface where the * packets for those addresses are received. */ -int ip_checkinterface = 0; +static int ip_checkinterface __read_mostly = 0; struct rttimer_queue *ip_mtudisc_timeout_q = NULL; -struct ifqueue ipintrq; +pktqueue_t * ip_pktq __read_mostly; +pfil_head_t * inet_pfil_hook __read_mostly; +ipid_state_t * ip_ids __read_mostly; +percpu_t * ipstat_percpu __read_mostly; -ipid_state_t * ip_ids; -uint16_t ip_id; +static struct route ipforward_rt __cacheline_aligned; -percpu_t *ipstat_percpu; - -pfil_head_t *inet_pfil_hook; +uint16_t ip_id; #ifdef INET_CSUM_COUNTERS #include @@ -264,6 +270,11 @@ static struct ip_srcrt { static int ip_drainwanted; +struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, +}; + static void save_rte(u_char *, struct in_addr); #ifdef MBUFTRACE @@ -271,12 +282,17 @@ struct mowner ip_rx_mowner = MOWNER_INIT struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx"); #endif +static void ipintr(void *); static void ip_input(struct mbuf *); static void ip_forward(struct mbuf *, int); static bool ip_dooptions(struct mbuf *); static struct in_ifaddr *ip_rtaddr(struct in_addr); static void sysctl_net_inet_ip_setup(struct sysctllog **); +/* XXX: Not yet enabled. */ +#define SOFTNET_LOCK() KASSERT(mutex_owned(softnet_lock)) +#define SOFTNET_UNLOCK() KASSERT(mutex_owned(softnet_lock)) + /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. @@ -292,6 +308,9 @@ ip_init(void) pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); KASSERT(pr != NULL); + ip_pktq = pktq_create(IFQ_MAXLEN, ipintr, NULL); + KASSERT(ip_pktq != NULL); + for (u_int i = 0; i < IPPROTO_MAX; i++) { ip_protox[i] = pr - inetsw; } @@ -304,9 +323,7 @@ ip_init(void) ip_reass_init(); ip_ids = ip_id_init(); - ip_id = time_second & 0xfffff; - - ipintrq.ifq_maxlen = IFQ_MAXLEN; + ip_id = time_uptime & 0xfffff; ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); #ifdef GATEWAY @@ -325,54 +342,25 @@ ip_init(void) ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); } -struct sockaddr_in ipaddr = { - .sin_len = sizeof(ipaddr), - .sin_family = AF_INET, -}; - -static struct route ipforward_rt; - /* - * IP software interrupt routine + * IP software interrupt routine. */ -void -ipintr(void) +static void +ipintr(void *arg __unused) { - int s; struct mbuf *m; - struct ifqueue lcl_intrq; - memset(&lcl_intrq, 0, sizeof(lcl_intrq)); + KASSERT(cpu_softintr_p()); mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); - if (!IF_IS_EMPTY(&ipintrq)) { - s = splnet(); - - /* Take existing queue onto stack */ - lcl_intrq = ipintrq; - - /* Zero out global queue, preserving maxlen and drops */ - ipintrq.ifq_head = NULL; - ipintrq.ifq_tail = NULL; - ipintrq.ifq_len = 0; - ipintrq.ifq_maxlen = lcl_intrq.ifq_maxlen; - ipintrq.ifq_drops = lcl_intrq.ifq_drops; - - splx(s); - } - KERNEL_UNLOCK_ONE(NULL); - while (!IF_IS_EMPTY(&lcl_intrq)) { - IF_DEQUEUE(&lcl_intrq, m); - if (m == NULL) - break; + while ((m = pktq_dequeue(ip_pktq)) != NULL) { ip_input(m); } mutex_exit(softnet_lock); } /* - * Ip input routine. Checksum and byte swap header. If fragmented + * IP input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ static void @@ -387,6 +375,9 @@ ip_input(struct mbuf *m) int srcrt = 0; ifnet_t *ifp; + KASSERTMSG(cpu_softintr_p(), "ip_input: not in the software " + "interrupt handler; synchronization assumptions violated"); + MCLAIM(m, &ip_rx_mowner); KASSERT((m->m_flags & M_PKTHDR) != 0); ifp = m->m_pkthdr.rcvif; @@ -394,10 +385,13 @@ ip_input(struct mbuf *m) /* * If no IP addresses have been set yet but the interfaces * are receiving, can't do anything with incoming packets yet. + * Note: we pre-check without locks held. */ - if (TAILQ_FIRST(&in_ifaddrhead) == 0) + if (!TAILQ_FIRST(&in_ifaddrhead)) { goto bad; + } IP_STATINC(IP_STAT_TOTAL); + /* * If the IP header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward @@ -522,19 +516,23 @@ ip_input(struct mbuf *m) * in the list may have previously cleared it. */ #if defined(IPSEC) - if (!ipsec_indone(m)) + if (!ipsec_used || !ipsec_indone(m)) #else if (1) #endif { struct in_addr odst = ip->ip_dst; + bool freed; - if (pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0) - return; - if (m == NULL) + SOFTNET_LOCK(); + freed = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0; + SOFTNET_UNLOCK(); + if (freed || m == NULL) { return; + } ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; + /* * XXX The setting of "srcrt" here is to prevent ip_forward() * from generating ICMP redirects for packets that have @@ -554,9 +552,14 @@ ip_input(struct mbuf *m) #ifdef ALTQ /* XXX Temporary until ALTQ is changed to use a pfil hook */ - if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) { - /* packet dropped by traffic conditioner */ - return; + if (altq_input) { + SOFTNET_LOCK(); + if ((*altq_input)(m, AF_INET) == 0) { + /* Packet dropped by traffic conditioner. */ + SOFTNET_UNLOCK(); + return; + } + SOFTNET_UNLOCK(); } #endif @@ -594,11 +597,13 @@ ip_input(struct mbuf *m) * * Traditional 4.4BSD did not consult IFF_UP at all. * The behavior here is to treat addresses on !IFF_UP interface - * as not mine. + * or IN_IFF_NOTREADY addresses as not mine. */ downmatch = 0; LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) { if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) { + if (ia->ia4_flags & IN_IFF_NOTREADY) + continue; if (checkif && ia->ia_ifp != ifp) continue; if ((ia->ia_ifp->if_flags & IFF_UP) != 0) @@ -614,6 +619,8 @@ ip_input(struct mbuf *m) if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); + if (ia->ia4_flags & IN_IFF_NOTREADY) + continue; if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || /* @@ -648,19 +655,23 @@ ip_input(struct mbuf *m) * as expected when ip_mforward() is called from * ip_output().) */ + SOFTNET_LOCK(); if (ip_mforward(m, ifp) != 0) { + SOFTNET_UNLOCK(); IP_STATINC(IP_STAT_CANTFORWARD); m_freem(m); return; } + SOFTNET_UNLOCK(); /* * The process-level routing demon needs to receive * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ - if (ip->ip_p == IPPROTO_IGMP) + if (ip->ip_p == IPPROTO_IGMP) { goto ours; + } IP_STATINC(IP_STAT_CANTFORWARD); } #endif @@ -699,9 +710,14 @@ ip_input(struct mbuf *m) } #ifdef IPSEC /* Perform IPsec, if any. */ - if (ipsec4_input(m, IP_FORWARDING | (ip_directedbcast ? - IP_ALLOWBROADCAST : 0)) != 0) { - goto bad; + if (ipsec_used) { + SOFTNET_LOCK(); + if (ipsec4_input(m, IP_FORWARDING | + (ip_directedbcast ? IP_ALLOWBROADCAST : 0)) != 0) { + SOFTNET_UNLOCK(); + goto bad; + } + SOFTNET_UNLOCK(); } #endif ip_forward(m, srcrt); @@ -738,10 +754,14 @@ ours: * Note that we do not visit this with protocols with PCB layer * code - like UDP/TCP/raw IP. */ - if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { + if (ipsec_used && + (inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { + SOFTNET_LOCK(); if (ipsec4_input(m, 0) != 0) { + SOFTNET_UNLOCK(); goto bad; } + SOFTNET_UNLOCK(); } #endif @@ -753,12 +773,13 @@ ours: ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); #endif IP_STATINC(IP_STAT_DELIVERED); - { - int off = hlen, nh = ip->ip_p; + const int off = hlen, nh = ip->ip_p; + + SOFTNET_LOCK(); (*inetsw[ip_protox[nh]].pr_input)(m, off, nh); + SOFTNET_UNLOCK(); return; - } bad: m_freem(m); return; @@ -1043,7 +1064,10 @@ ip_rtaddr(struct in_addr dst) sockaddr_in_init(&u.dst4, &dst, 0); - if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) + SOFTNET_LOCK(); + rt = rtcache_lookup(&ipforward_rt, &u.dst); + SOFTNET_UNLOCK(); + if (rt == NULL) return NULL; return ifatoia(rt->rt_ifa); @@ -1173,6 +1197,10 @@ ip_forward(struct mbuf *m, int srcrt) struct sockaddr dst; struct sockaddr_in dst4; } u; + uint64_t *ips; + + KASSERTMSG(cpu_softintr_p(), "ip_forward: not in the software " + "interrupt handler; synchronization assumptions violated"); /* * We are now in the output path. @@ -1190,14 +1218,20 @@ ip_forward(struct mbuf *m, int srcrt) m_freem(m); return; } + + SOFTNET_LOCK(); + if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); + SOFTNET_UNLOCK(); return; } sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); + if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0); + SOFTNET_UNLOCK(); return; } @@ -1244,28 +1278,38 @@ ip_forward(struct mbuf *m, int srcrt) (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), NULL, NULL); - if (error) + if (error) { IP_STATINC(IP_STAT_CANTFORWARD); - else { - uint64_t *ips = IP_STAT_GETREF(); - ips[IP_STAT_FORWARD]++; - if (type) { - ips[IP_STAT_REDIRECTSENT]++; - IP_STAT_PUTREF(); - } else { - IP_STAT_PUTREF(); - if (mcopy) { + goto error; + } + + ips = IP_STAT_GETREF(); + ips[IP_STAT_FORWARD]++; + + if (type) { + ips[IP_STAT_REDIRECTSENT]++; + IP_STAT_PUTREF(); + goto redirect; + } + + IP_STAT_PUTREF(); + if (mcopy) { #ifdef GATEWAY - if (mcopy->m_flags & M_CANFASTFWD) - ipflow_create(&ipforward_rt, mcopy); + if (mcopy->m_flags & M_CANFASTFWD) + ipflow_create(&ipforward_rt, mcopy); #endif - m_freem(mcopy); - } - return; - } + m_freem(mcopy); } - if (mcopy == NULL) + + SOFTNET_UNLOCK(); + return; + +redirect: +error: + if (mcopy == NULL) { + SOFTNET_UNLOCK(); return; + } switch (error) { @@ -1289,7 +1333,8 @@ ip_forward(struct mbuf *m, int srcrt) if ((rt = rtcache_validate(&ipforward_rt)) != NULL) destmtu = rt->rt_ifp->if_mtu; #ifdef IPSEC - (void)ipsec4_forward(mcopy, &destmtu); + if (ipsec_used) + (void)ipsec4_forward(mcopy, &destmtu); #endif IP_STATINC(IP_STAT_CANTFRAG); break; @@ -1303,9 +1348,11 @@ ip_forward(struct mbuf *m, int srcrt) */ if (mcopy) m_freem(mcopy); + SOFTNET_UNLOCK(); return; } icmp_error(mcopy, type, code, dest, destmtu); + SOFTNET_UNLOCK(); } void @@ -1537,15 +1584,6 @@ sysctl_net_inet_ip_setup(struct sysctllo sysctl_net_inet_ip_pmtudto, 0, (void *)&ip_mtudisc_timeout, 0, CTL_NET, PF_INET, IPPROTO_IP, IPCTL_MTUDISCTIMEOUT, CTL_EOL); -#if NGIF > 0 - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "gifttl", - SYSCTL_DESCR("Default TTL for a gif tunnel datagram"), - NULL, 0, &ip_gif_ttl, 0, - CTL_NET, PF_INET, IPPROTO_IP, - IPCTL_GIF_TTL, CTL_EOL); -#endif /* NGIF */ #ifndef IPNOPRIVPORTS sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, @@ -1602,6 +1640,16 @@ sysctl_net_inet_ip_setup(struct sysctllo sysctl_net_inet_ip_stats, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS, CTL_EOL); +#if NARP + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "dad_count", + SYSCTL_DESCR("Number of Duplicate Address Detection " + "probes to send"), + NULL, 0, &ip_dad_count, 0, + CTL_NET, PF_INET, IPPROTO_IP, + IPCTL_DAD_COUNT, CTL_EOL); +#endif /* anonportalgo RFC6056 subtree */ const struct sysctlnode *portalgo_node;