Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v retrieving revision 1.262.6.5 retrieving revision 1.263 diff -u -p -r1.262.6.5 -r1.263 --- src/sys/netinet/ip_input.c 2009/01/17 13:29:32 1.262.6.5 +++ src/sys/netinet/ip_input.c 2008/03/27 00:18:56 1.263 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.262.6.5 2009/01/17 13:29:32 mjf Exp $ */ +/* $NetBSD: ip_input.c,v 1.263 2008/03/27 00:18:56 cube Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -45,6 +45,13 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED @@ -91,7 +98,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.262.6.5 2009/01/17 13:29:32 mjf Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.263 2008/03/27 00:18:56 cube Exp $"); #include "opt_inet.h" #include "opt_gateway.h" @@ -128,7 +135,6 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include #include #include -#include #include /* just for gif_ttl */ #include @@ -142,7 +148,6 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #ifdef IPSEC #include -#include #include #endif #ifdef FAST_IPSEC @@ -225,10 +230,9 @@ u_long in_multihash; /* size of hash int in_multientries; /* total number of addrs */ struct in_multihashhead *in_multihashtbl; struct ifqueue ipintrq; +struct ipstat ipstat; uint16_t ip_id; -percpu_t *ipstat_percpu; - #ifdef PFIL_HOOKS struct pfil_head inet_pfil_hook; #endif @@ -330,8 +334,10 @@ do { \ #define IPQ_UNLOCK() ipq_unlock() -struct pool inmulti_pool; -struct pool ipqent_pool; +POOL_INIT(inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", NULL, + IPL_SOFTNET); +POOL_INIT(ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", NULL, + IPL_VM); #ifdef INET_CSUM_COUNTERS #include @@ -397,11 +403,6 @@ ip_init(void) const struct protosw *pr; int i; - pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", - NULL, IPL_SOFTNET); - pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", - NULL, IPL_VM); - pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); @@ -423,10 +424,10 @@ ip_init(void) ip_nmbclusters_changed(); TAILQ_INIT(&in_ifaddrhead); - in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, - &in_ifaddrhash); - in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, true, - &in_multihash); + in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR, + M_WAITOK, &in_ifaddrhash); + in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR, + M_WAITOK, &in_multihash); ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); #ifdef GATEWAY ipflow_init(ip_hashsize); @@ -446,8 +447,6 @@ ip_init(void) MOWNER_ATTACH(&ip_tx_mowner); MOWNER_ATTACH(&ip_rx_mowner); #endif /* MBUFTRACE */ - - ipstat_percpu = percpu_alloc(sizeof(uint64_t) * IP_NSTATS); } struct sockaddr_in ipaddr = { @@ -465,18 +464,14 @@ ipintr(void) int s; struct mbuf *m; - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); while (!IF_IS_EMPTY(&ipintrq)) { s = splnet(); IF_DEQUEUE(&ipintrq, m); splx(s); - if (m == NULL) - break; + if (m == 0) + return; ip_input(m); } - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); } /* @@ -516,7 +511,7 @@ ip_input(struct mbuf *m) */ if (TAILQ_FIRST(&in_ifaddrhead) == 0) goto bad; - IP_STATINC(IP_STAT_TOTAL); + ipstat.ips_total++; /* * If the IP header is not aligned, slurp it up into a new * mbuf with space for link headers, in the event we forward @@ -527,28 +522,28 @@ ip_input(struct mbuf *m) if ((m = m_copyup(m, sizeof(struct ip), (max_linkhdr + 3) & ~3)) == NULL) { /* XXXJRT new stat, please */ - IP_STATINC(IP_STAT_TOOSMALL); + ipstat.ips_toosmall++; return; } } else if (__predict_false(m->m_len < sizeof (struct ip))) { if ((m = m_pullup(m, sizeof (struct ip))) == NULL) { - IP_STATINC(IP_STAT_TOOSMALL); + ipstat.ips_toosmall++; return; } } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { - IP_STATINC(IP_STAT_BADVERS); + ipstat.ips_badvers++; goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ - IP_STATINC(IP_STAT_BADHLEN); + ipstat.ips_badhlen++; goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { - IP_STATINC(IP_STAT_BADHLEN); + ipstat.ips_badhlen++; return; } ip = mtod(m, struct ip *); @@ -559,7 +554,7 @@ ip_input(struct mbuf *m) * not allowed. */ if (IN_MULTICAST(ip->ip_src.s_addr)) { - IP_STATINC(IP_STAT_BADADDR); + ipstat.ips_badaddr++; goto bad; } @@ -567,7 +562,7 @@ ip_input(struct mbuf *m) if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { - IP_STATINC(IP_STAT_BADADDR); + ipstat.ips_badaddr++; goto bad; } } @@ -605,7 +600,7 @@ ip_input(struct mbuf *m) * Check for additional length bogosity */ if (len < hlen) { - IP_STATINC(IP_STAT_BADLEN); + ipstat.ips_badlen++; goto bad; } @@ -616,7 +611,7 @@ ip_input(struct mbuf *m) * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < len) { - IP_STATINC(IP_STAT_TOOSHORT); + ipstat.ips_tooshort++; goto bad; } if (m->m_pkthdr.len > len) { @@ -785,7 +780,7 @@ ip_input(struct mbuf *m) * ip_output().) */ if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; m_freem(m); return; } @@ -797,7 +792,7 @@ ip_input(struct mbuf *m) */ if (ip->ip_p == IPPROTO_IGMP) goto ours; - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_forward++; } #endif /* @@ -806,7 +801,7 @@ ip_input(struct mbuf *m) */ IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); if (inm == NULL) { - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; m_freem(m); return; } @@ -820,7 +815,7 @@ ip_input(struct mbuf *m) * Not for us; forward if possible and desirable. */ if (ipforwarding == 0) { - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; m_freem(m); } else { /* @@ -831,12 +826,12 @@ ip_input(struct mbuf *m) */ if (downmatch) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; return; } #ifdef IPSEC if (ipsec4_in_reject(m, NULL)) { - IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + ipsecstat.in_polvio++; goto bad; } #endif @@ -864,7 +859,7 @@ ip_input(struct mbuf *m) KEY_FREESP(&sp); splx(s); if (error) { - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; goto bad; } @@ -911,7 +906,7 @@ ours: */ off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; if ((off > 0 ? off + hlen : len) < IP_MINFRAGSIZE - 1) { - IP_STATINC(IP_STAT_BADFRAGS); + ipstat.ips_badfrags++; goto bad; } /* @@ -930,8 +925,7 @@ ours: * fragments. */ if (ip->ip_tos != fp->ipq_tos) { - IP_STATINC(IP_STAT_BADFRAGS); - IPQ_UNLOCK(); + ipstat.ips_badfrags++; goto bad; } goto found; @@ -954,7 +948,7 @@ found: */ if (ntohs(ip->ip_len) == 0 || (ntohs(ip->ip_len) & 0x7) != 0) { - IP_STATINC(IP_STAT_BADFRAGS); + ipstat.ips_badfrags++; IPQ_UNLOCK(); goto bad; } @@ -967,12 +961,12 @@ found: * attempt reassembly; if it succeeds, proceed. */ if (mff || ip->ip_off != htons(0)) { - IP_STATINC(IP_STAT_FRAGMENTS); + ipstat.ips_fragments++; s = splvm(); ipqe = pool_get(&ipqent_pool, PR_NOWAIT); splx(s); if (ipqe == NULL) { - IP_STATINC(IP_STAT_RCVMEMDROP); + ipstat.ips_rcvmemdrop++; IPQ_UNLOCK(); goto bad; } @@ -984,7 +978,7 @@ found: IPQ_UNLOCK(); return; } - IP_STATINC(IP_STAT_REASSEMBLED); + ipstat.ips_reassembled++; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; ip->ip_len = htons(ntohs(ip->ip_len) + hlen); @@ -1002,7 +996,7 @@ found: */ if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && ipsec4_in_reject(m, NULL)) { - IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + ipsecstat.in_polvio++; goto bad; } #endif @@ -1052,7 +1046,7 @@ DPRINTF(("ip_input: no SP, packet discar if (ia && ip) ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); #endif - IP_STATINC(IP_STAT_DELIVERED); + ipstat.ips_delivered++; { int off = hlen, nh = ip->ip_p; @@ -1064,7 +1058,7 @@ bad: return; badcsum: - IP_STATINC(IP_STAT_BADSUM); + ipstat.ips_badsum++; m_freem(m); } @@ -1122,7 +1116,8 @@ ip_reass(struct ipqent *ipqe, struct ipq else if (ip_nfragpackets >= ip_maxfragpackets) goto dropfrag; ip_nfragpackets++; - fp = malloc(sizeof (struct ipq), M_FTABLE, M_NOWAIT); + MALLOC(fp, struct ipq *, sizeof (struct ipq), + M_FTABLE, M_NOWAIT); if (fp == NULL) goto dropfrag; LIST_INSERT_HEAD(ipqhead, fp, ipq_q); @@ -1221,7 +1216,7 @@ insert: q = TAILQ_FIRST(&fp->ipq_fragq); ip = q->ipqe_ip; if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) { - IP_STATINC(IP_STAT_TOOLONG); + ipstat.ips_toolong++; ip_freef(fp); return (0); } @@ -1253,7 +1248,7 @@ insert: ip->ip_src = fp->ipq_src; ip->ip_dst = fp->ipq_dst; LIST_REMOVE(fp, ipq_q); - free(fp, M_FTABLE); + FREE(fp, M_FTABLE); ip_nfragpackets--; m->m_len += (ip->ip_hl << 2); m->m_data -= (ip->ip_hl << 2); @@ -1271,7 +1266,7 @@ dropfrag: if (fp != 0) fp->ipq_nfrags--; ip_nfrags--; - IP_STATINC(IP_STAT_FRAGDROPPED); + ipstat.ips_fragdropped++; m_freem(m); s = splvm(); pool_put(&ipqent_pool, ipqe); @@ -1306,7 +1301,7 @@ ip_freef(struct ipq *fp) printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags); ip_nfrags -= nfrags; LIST_REMOVE(fp, ipq_q); - free(fp, M_FTABLE); + FREE(fp, M_FTABLE); ip_nfragpackets--; } @@ -1338,7 +1333,7 @@ ip_reass_ttl_decr(u_int ticks) 0 : fp->ipq_ttl - ticks); nfp = LIST_NEXT(fp, ipq_q); if (fp->ipq_ttl == 0) { - IP_STATINC(IP_STAT_FRAGTIMEOUT); + ipstat.ips_fragtimeout++; ip_freef(fp); } else { nfrags += fp->ipq_nfrags; @@ -1389,9 +1384,7 @@ ip_slowtimo(void) static u_int dropscanidx = 0; u_int i; u_int median_ttl; - - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); + int s = splsoftnet(); IPQ_LOCK(); @@ -1433,35 +1426,30 @@ ip_slowtimo(void) dropscanidx = i; } IPQ_UNLOCK(); - - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); + splx(s); } /* - * Drain off all datagram fragments. Don't acquire softnet_lock as - * can be called from hardware interrupt context. + * Drain off all datagram fragments. */ void ip_drain(void) { - KERNEL_LOCK(1, NULL); - /* * We may be called from a device's interrupt context. If * the ipq is already busy, just bail out now. */ - if (ipq_lock_try() != 0) { - /* - * Drop half the total fragments now. If more mbufs are - * needed, we will be called again soon. - */ - ip_reass_drophalf(); - IPQ_UNLOCK(); - } + if (ipq_lock_try() == 0) + return; + + /* + * Drop half the total fragments now. If more mbufs are needed, + * we will be called again soon. + */ + ip_reass_drophalf(); - KERNEL_UNLOCK_ONE(NULL); + IPQ_UNLOCK(); } /* @@ -1692,7 +1680,7 @@ ip_dooptions(struct mbuf *m) return (0); bad: icmp_error(m, type, code, 0, 0); - IP_STATINC(IP_STAT_BADOPTIONS); + ipstat.ips_badoptions++; return (1); } @@ -1865,7 +1853,7 @@ ip_forward(struct mbuf *m, int srcrt) } #endif if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; m_freem(m); return; } @@ -1929,15 +1917,12 @@ ip_forward(struct mbuf *m, int srcrt) (struct ip_moptions *)NULL, (struct socket *)NULL); if (error) - IP_STATINC(IP_STAT_CANTFORWARD); + ipstat.ips_cantforward++; else { - uint64_t *ips = IP_STAT_GETREF(); - ips[IP_STAT_FORWARD]++; - if (type) { - ips[IP_STAT_REDIRECTSENT]++; - IP_STAT_PUTREF(); - } else { - IP_STAT_PUTREF(); + ipstat.ips_forward++; + if (type) + ipstat.ips_redirectsent++; + else { if (mcopy) { #ifdef GATEWAY if (mcopy->m_flags & M_CANFASTFWD) @@ -1970,11 +1955,9 @@ ip_forward(struct mbuf *m, int srcrt) type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - if ((rt = rtcache_validate(&ipforward_rt)) != NULL) - destmtu = rt->rt_ifp->if_mtu; + if ((rt = rtcache_validate(&ipforward_rt)) != NULL) { #if defined(IPSEC) || defined(FAST_IPSEC) - { /* * If the packet is routed over IPsec tunnel, tell the * originator the tunnel MTU. @@ -1990,7 +1973,10 @@ ip_forward(struct mbuf *m, int srcrt) sp = ipsec4_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); +#endif + destmtu = rt->rt_ifp->if_mtu; +#if defined(IPSEC) || defined(FAST_IPSEC) if (sp != NULL) { /* count IPsec header size */ ipsechdr = ipsec4_hdrsiz(mcopy, @@ -2005,7 +1991,6 @@ ip_forward(struct mbuf *m, int srcrt) && sp->req->sav != NULL && sp->req->sav->sah != NULL) { ro = &sp->req->sav->sah->sa_route; - rt = rtcache_validate(ro); if (rt && rt->rt_ifp) { destmtu = rt->rt_rmx.rmx_mtu ? @@ -2021,9 +2006,9 @@ ip_forward(struct mbuf *m, int srcrt) KEY_FREESP(&sp); #endif } - } #endif /*defined(IPSEC) || defined(FAST_IPSEC)*/ - IP_STATINC(IP_STAT_CANTFRAG); + } + ipstat.ips_cantfrag++; break; case ENOBUFS: @@ -2146,13 +2131,9 @@ sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS if (tmp < 0) return (EINVAL); - mutex_enter(softnet_lock); - ip_mtudisc_timeout = tmp; rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout); - mutex_exit(softnet_lock); - return (0); } @@ -2163,19 +2144,15 @@ sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS static int sysctl_net_inet_ip_maxflows(SYSCTLFN_ARGS) { - int error; - - error = sysctl_lookup(SYSCTLFN_CALL(rnode)); - if (error || newp == NULL) - return (error); - - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); + int s; - ipflow_prune(); + s = sysctl_lookup(SYSCTLFN_CALL(rnode)); + if (s || newp == NULL) + return (s); - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); + s = splsoftnet(); + ipflow_reap(0); + splx(s); return (0); } @@ -2197,31 +2174,19 @@ sysctl_net_inet_ip_hashsize(SYSCTLFN_ARG /* * Can only fail due to malloc() */ - mutex_enter(softnet_lock); - KERNEL_LOCK(1, NULL); - - error = ipflow_invalidate_all(tmp); - - KERNEL_UNLOCK_ONE(NULL); - mutex_exit(softnet_lock); - + if (ipflow_invalidate_all(tmp)) + return ENOMEM; } else { /* * EINVAL if not a power of 2 */ - error = EINVAL; + return EINVAL; } - return error; + return (0); } #endif /* GATEWAY */ -static int -sysctl_net_inet_ip_stats(SYSCTLFN_ARGS) -{ - - return (NETSTAT_SYSCTL(ipstat_percpu, IP_NSTATS)); -} SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup") { @@ -2426,15 +2391,7 @@ SYSCTL_SETUP(sysctl_net_inet_ip_setup, " CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "stats", SYSCTL_DESCR("IP statistics"), - sysctl_net_inet_ip_stats, 0, NULL, 0, + NULL, 0, &ipstat, sizeof(ipstat), CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS, CTL_EOL); } - -void -ip_statinc(u_int stat) -{ - - KASSERT(stat < IP_NSTATS); - IP_STATINC(stat); -}