Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.212.2.3 retrieving revision 1.243 diff -u -p -r1.212.2.3 -r1.243 --- src/sys/netinet/ip_input.c 2007/09/17 20:14:21 1.212.2.3 +++ src/sys/netinet/ip_input.c 2007/02/17 22:34:11 1.243 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.212.2.3 2007/09/17 20:14:21 bouyer Exp $ */ +/* $NetBSD: ip_input.c,v 1.243 2007/02/17 22:34:11 dyoung Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -98,7 +98,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.212.2.3 2007/09/17 20:14:21 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.243 2007/02/17 22:34:11 dyoung Exp $"); #include "opt_inet.h" #include "opt_gateway.h" @@ -121,6 +121,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include #include #include +#include #include #include @@ -131,6 +132,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include #include #include +#include #include #include #include @@ -278,10 +280,10 @@ static u_int ip_reass_ttl_decr(u_int tic static void ip_reass_drophalf(void); -static __inline int ipq_lock_try(void); -static __inline void ipq_unlock(void); +static inline int ipq_lock_try(void); +static inline void ipq_unlock(void); -static __inline int +static inline int ipq_lock_try(void) { int s; @@ -300,7 +302,7 @@ ipq_lock_try(void) return (1); } -static __inline void +static inline void ipq_unlock(void) { int s; @@ -375,8 +377,8 @@ static struct ip_srcrt { static void save_rte(u_char *, struct in_addr); #ifdef MBUFTRACE -struct mowner ip_rx_mowner = { "internet", "rx" }; -struct mowner ip_tx_mowner = { "internet", "tx" }; +struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx"); +struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx"); #endif /* @@ -413,7 +415,7 @@ ip_init(void) for (i = 0; i < IPREASS_NHASH; i++) LIST_INIT(&ipq[i]); - ip_id = time.tv_sec & 0xfffff; + ip_id = time_second & 0xfffff; ipintrq.ifq_maxlen = ipqmaxlen; ip_nmbclusters_changed(); @@ -444,7 +446,10 @@ ip_init(void) #endif /* MBUFTRACE */ } -struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; +struct sockaddr_in ipaddr = { + .sin_len = sizeof(ipaddr), + .sin_family = AF_INET, +}; struct route ipforward_rt; /* @@ -456,7 +461,7 @@ ipintr(void) int s; struct mbuf *m; - while (1) { + while (!IF_IS_EMPTY(&ipintrq)) { s = splnet(); IF_DEQUEUE(&ipintrq, m); splx(s); @@ -483,12 +488,13 @@ ip_input(struct mbuf *m) int downmatch; int checkif; int srcrt = 0; + int s; u_int hash; #ifdef FAST_IPSEC struct m_tag *mtag; struct tdb_ident *tdbi; struct secpolicy *sp; - int s, error; + int error; #endif /* FAST_IPSEC */ MCLAIM(m, &ip_rx_mowner); @@ -731,7 +737,7 @@ ip_input(struct mbuf *m) } if (ia != NULL) goto ours; - if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { + if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) { if (ifa->ifa_addr->sa_family != AF_INET) continue; @@ -937,7 +943,9 @@ found: */ if (mff || ip->ip_off != htons(0)) { ipstat.ips_fragments++; + s = splvm(); ipqe = pool_get(&ipqent_pool, PR_NOWAIT); + splx(s); if (ipqe == NULL) { ipstat.ips_rcvmemdrop++; IPQ_UNLOCK(); @@ -973,7 +981,7 @@ found: goto bad; } #endif -#if FAST_IPSEC +#ifdef FAST_IPSEC /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer @@ -1005,6 +1013,7 @@ found: /* XXX error stat??? */ error = EINVAL; DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ + goto bad; } splx(s); if (error) @@ -1049,7 +1058,7 @@ ip_reass(struct ipqent *ipqe, struct ipq struct ip *ip; struct mbuf *t; int hlen = ipqe->ipqe_ip->ip_hl << 2; - int i, next; + int i, next, s; IPQ_LOCK_CHECK(); @@ -1154,7 +1163,9 @@ ip_reass(struct ipqent *ipqe, struct ipq nq = TAILQ_NEXT(q, ipqe_q); m_freem(q->ipqe_m); TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); + s = splvm(); pool_put(&ipqent_pool, q); + splx(s); fp->ipq_nfrags--; ip_nfrags--; } @@ -1195,11 +1206,15 @@ insert: m->m_next = 0; m_cat(m, t); nq = TAILQ_NEXT(q, ipqe_q); + s = splvm(); pool_put(&ipqent_pool, q); + splx(s); for (q = nq; q != NULL; q = nq) { t = q->ipqe_m; nq = TAILQ_NEXT(q, ipqe_q); + s = splvm(); pool_put(&ipqent_pool, q); + splx(s); m_cat(m, t); } ip_nfrags -= fp->ipq_nfrags; @@ -1234,7 +1249,9 @@ dropfrag: ip_nfrags--; ipstat.ips_fragdropped++; m_freem(m); + s = splvm(); pool_put(&ipqent_pool, ipqe); + splx(s); return (0); } @@ -1247,6 +1264,7 @@ ip_freef(struct ipq *fp) { struct ipqent *q, *p; u_int nfrags = 0; + int s; IPQ_LOCK_CHECK(); @@ -1255,7 +1273,9 @@ ip_freef(struct ipq *fp) m_freem(q->ipqe_m); nfrags++; TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q); + s = splvm(); pool_put(&ipqent_pool, q); + splx(s); } if (nfrags != fp->ipq_nfrags) @@ -1655,24 +1675,23 @@ bad: struct in_ifaddr * ip_rtaddr(struct in_addr dst) { - struct sockaddr_in *sin; + if (!in_hosteq(dst, satocsin(rtcache_getdst(&ipforward_rt))->sin_addr)) + rtcache_free(&ipforward_rt); + else + rtcache_check(&ipforward_rt); - sin = satosin(&ipforward_rt.ro_dst); + if (ipforward_rt.ro_rt == NULL) { + struct sockaddr_in *sin = satosin(&ipforward_rt.ro_dst); - if (ipforward_rt.ro_rt == 0 || !in_hosteq(dst, sin->sin_addr)) { - if (ipforward_rt.ro_rt) { - RTFREE(ipforward_rt.ro_rt); - ipforward_rt.ro_rt = 0; - } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; - rtalloc(&ipforward_rt); + rtcache_init(&ipforward_rt); + if (ipforward_rt.ro_rt == NULL) + return NULL; } - if (ipforward_rt.ro_rt == 0) - return ((struct in_ifaddr *)0); - return (ifatoia(ipforward_rt.ro_rt->rt_ifa)); + return ifatoia(ipforward_rt.ro_rt->rt_ifa); } /* @@ -1708,10 +1727,10 @@ ip_srcroute(void) struct mbuf *m; if (ip_nhops == 0) - return ((struct mbuf *)0); + return NULL; m = m_get(M_DONTWAIT, MT_SOOPTS); if (m == 0) - return ((struct mbuf *)0); + return NULL; MCLAIM(m, &inetdomain.dom_mowner); #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) @@ -1819,15 +1838,10 @@ void ip_forward(struct mbuf *m, int srcrt) { struct ip *ip = mtod(m, struct ip *); - struct sockaddr_in *sin; struct rtentry *rt; - int error, type = 0, code = 0; + int error, type = 0, code = 0, destmtu = 0; struct mbuf *mcopy; n_long dest; - struct ifnet *destifp; -#if defined(IPSEC) || defined(FAST_IPSEC) - struct ifnet dummyifp; -#endif /* * We are now in the output path. @@ -1841,10 +1855,10 @@ ip_forward(struct mbuf *m, int srcrt) dest = 0; #ifdef DIAGNOSTIC - if (ipprintfs) - printf("forward: src %2.2x dst %2.2x ttl %x\n", - ntohl(ip->ip_src.s_addr), - ntohl(ip->ip_dst.s_addr), ip->ip_ttl); + if (ipprintfs) { + printf("forward: src %s ", inet_ntoa(ip->ip_src)); + printf("dst %s ttl %x\n", inet_ntoa(ip->ip_dst), ip->ip_ttl); + } #endif if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { ipstat.ips_cantforward++; @@ -1855,26 +1869,26 @@ ip_forward(struct mbuf *m, int srcrt) icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); return; } - ip->ip_ttl -= IPTTLDEC; - sin = satosin(&ipforward_rt.ro_dst); - if ((rt = ipforward_rt.ro_rt) == 0 || - !in_hosteq(ip->ip_dst, sin->sin_addr)) { - if (ipforward_rt.ro_rt) { - RTFREE(ipforward_rt.ro_rt); - ipforward_rt.ro_rt = 0; - } + if (!in_hosteq(ip->ip_dst, + satocsin(rtcache_getdst(&ipforward_rt))->sin_addr)) + rtcache_free(&ipforward_rt); + else + rtcache_check(&ipforward_rt); + if (ipforward_rt.ro_rt == NULL) { + struct sockaddr_in *sin = satosin(&ipforward_rt.ro_dst); + sin->sin_family = AF_INET; - sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; - rtalloc(&ipforward_rt); - if (ipforward_rt.ro_rt == 0) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); + rtcache_init(&ipforward_rt); + if (ipforward_rt.ro_rt == NULL) { + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0); return; } - rt = ipforward_rt.ro_rt; } + rt = ipforward_rt.ro_rt; /* * Save at most 68 bytes of the packet in case @@ -1885,6 +1899,8 @@ ip_forward(struct mbuf *m, int srcrt) if (mcopy) mcopy = m_pullup(mcopy, ip->ip_hl << 2); + ip->ip_ttl -= IPTTLDEC; + /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. @@ -1918,7 +1934,7 @@ ip_forward(struct mbuf *m, int srcrt) } } - error = ip_output(m, (struct mbuf *)0, &ipforward_rt, + error = ip_output(m, NULL, &ipforward_rt, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), (struct ip_moptions *)NULL, (struct socket *)NULL); @@ -1941,7 +1957,6 @@ ip_forward(struct mbuf *m, int srcrt) } if (mcopy == NULL) return; - destifp = NULL; switch (error) { @@ -1962,8 +1977,8 @@ ip_forward(struct mbuf *m, int srcrt) type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; #if !defined(IPSEC) && !defined(FAST_IPSEC) - if (ipforward_rt.ro_rt) - destifp = ipforward_rt.ro_rt->rt_ifp; + if (ipforward_rt.ro_rt != NULL) + destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu; #else /* * If the packet is routed over IPsec tunnel, tell the @@ -1971,7 +1986,7 @@ ip_forward(struct mbuf *m, int srcrt) * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ - if (ipforward_rt.ro_rt) { + if (ipforward_rt.ro_rt != NULL) { struct secpolicy *sp; int ipsecerror; size_t ipsechdr; @@ -1982,7 +1997,7 @@ ip_forward(struct mbuf *m, int srcrt) &ipsecerror); if (sp == NULL) - destifp = ipforward_rt.ro_rt->rt_ifp; + destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu; else { /* count IPsec header size */ ipsechdr = ipsec4_hdrsiz(mcopy, @@ -1991,24 +2006,18 @@ ip_forward(struct mbuf *m, int srcrt) /* * find the correct route for outer IPv4 * header, compute tunnel MTU. - * - * XXX BUG ALERT - * The "dummyifp" code relies upon the fact - * that icmp_error() touches only ifp->if_mtu. */ - /*XXX*/ - destifp = NULL; + if (sp->req != NULL && sp->req->sav != NULL && sp->req->sav->sah != NULL) { ro = &sp->req->sav->sah->sa_route; if (ro->ro_rt && ro->ro_rt->rt_ifp) { - dummyifp.if_mtu = + destmtu = ro->ro_rt->rt_rmx.rmx_mtu ? ro->ro_rt->rt_rmx.rmx_mtu : ro->ro_rt->rt_ifp->if_mtu; - dummyifp.if_mtu -= ipsechdr; - destifp = &dummyifp; + destmtu -= ipsechdr; } } @@ -2040,7 +2049,7 @@ ip_forward(struct mbuf *m, int srcrt) break; #endif } - icmp_error(mcopy, type, code, dest, destifp); + icmp_error(mcopy, type, code, dest, destmtu); } void @@ -2101,6 +2110,31 @@ ip_savecontrol(struct inpcb *inp, struct } /* + * sysctl helper routine for net.inet.ip.forwsrcrt. + */ +static int +sysctl_net_inet_ip_forwsrcrt(SYSCTLFN_ARGS) +{ + int error, tmp; + struct sysctlnode node; + + node = *rnode; + tmp = ip_forwsrcrt; + node.sysctl_data = &tmp; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FORWSRCRT, + 0, NULL, NULL, NULL)) + return (EPERM); + + ip_forwsrcrt = tmp; + + return (0); +} + +/* * sysctl helper routine for net.inet.ip.mtudisctimeout. checks the * range of the new value and tweaks timers if it changes. */ @@ -2201,11 +2235,11 @@ SYSCTL_SETUP(sysctl_net_inet_ip_setup, " IPCTL_DEFMTU, CTL_EOL); #endif /* IPCTL_DEFMTU */ sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT|CTLFLAG_READONLY1, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "forwsrcrt", SYSCTL_DESCR("Enable forwarding of source-routed " "datagrams"), - NULL, 0, &ip_forwsrcrt, 0, + sysctl_net_inet_ip_forwsrcrt, 0, &ip_forwsrcrt, 0, CTL_NET, PF_INET, IPPROTO_IP, IPCTL_FORWSRCRT, CTL_EOL); sysctl_createv(clog, 0, NULL, NULL, @@ -2340,4 +2374,11 @@ SYSCTL_SETUP(sysctl_net_inet_ip_setup, " NULL, 0, &ip_do_loopback_cksum, 0, CTL_NET, PF_INET, IPPROTO_IP, IPCTL_LOOPBACKCKSUM, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_STRUCT, "stats", + SYSCTL_DESCR("IP statistics"), + NULL, 0, &ipstat, sizeof(ipstat), + CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS, + CTL_EOL); }