Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.56 retrieving revision 1.74.2.1 diff -u -p -r1.56 -r1.74.2.1 --- src/sys/netinet/ip_input.c 1998/01/28 02:36:10 1.56 +++ src/sys/netinet/ip_input.c 1998/12/11 04:53:08 1.74.2.1 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.56 1998/01/28 02:36:10 thorpej Exp $ */ +/* $NetBSD: ip_input.c,v 1.74.2.1 1998/12/11 04:53:08 kenh Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1993 @@ -35,6 +35,45 @@ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ +/*- + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Public Access Networks Corporation ("Panix"). It was developed under + * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_gateway.h" +#include "opt_pfil_hooks.h" #include "opt_mrouting.h" #include @@ -49,6 +88,7 @@ #include #include #include +#include #include #include @@ -85,6 +125,9 @@ #ifndef IPMTUDISC #define IPMTUDISC 0 #endif +#ifndef IPMTUDISCTIMEOUT +#define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */ +#endif /* * Note: DIRECTED_BROADCAST is handled this way so that previous @@ -104,16 +147,26 @@ int ip_forwsrcrt = IPFORWSRCRT; int ip_directedbcast = IPDIRECTEDBCAST; int ip_allowsrcrt = IPALLOWSRCRT; int ip_mtudisc = IPMTUDISC; +u_int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; #ifdef DIAGNOSTIC int ipprintfs = 0; #endif +struct rttimer_queue *ip_mtudisc_timeout_q = NULL; + extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; int ipqmaxlen = IFQ_MAXLEN; struct in_ifaddrhead in_ifaddr; +struct in_ifaddrhashhead *in_ifaddrhashtbl; struct ifqueue ipintrq; +struct ipstat ipstat; +u_int16_t ip_id; +int ip_defttl; +struct ipqhead ipq; + +struct pool ipqent_pool; /* * We need to save the IP options in case a protocol wants to respond @@ -142,6 +195,9 @@ ip_init() register struct protosw *pr; register int i; + pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", + 0, NULL, NULL, M_IPQ); + pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); @@ -156,6 +212,14 @@ ip_init() ip_id = time.tv_sec & 0xffff; ipintrq.ifq_maxlen = ipqmaxlen; TAILQ_INIT(&in_ifaddr); + in_ifaddrhashtbl = + hashinit(IN_IFADDR_HASH_SIZE, M_IFADDR, M_WAITOK, &in_ifaddrhash); + if (ip_mtudisc != 0) + ip_mtudisc_timeout_q = + rt_timer_queue_create(ip_mtudisc_timeout); +#ifdef GATEWAY + ipflow_init(); +#endif } struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; @@ -172,6 +236,7 @@ ipintr() register struct mbuf *m; register struct ipq *fp; register struct in_ifaddr *ia; + register struct ifaddr *ifa; struct ipqent *ipqe; int hlen = 0, mff, len, s; #ifdef PFIL_HOOKS @@ -254,17 +319,30 @@ next: m_adj(m, len - m->m_pkthdr.len); } + /* + * Assume that we can create a fast-forward IP flow entry + * based on this packet. + */ + m->m_flags |= M_CANFASTFWD; + #ifdef PFIL_HOOKS /* - * Run through list of hooks for input packets. + * Run through list of hooks for input packets. If there are any + * filters which require that additional packets in the flow are + * not fast-forwarded, they must clear the M_CANFASTFWD flag. + * Note that filters must _never_ set this flag, as another filter + * in the list may have previously cleared it. */ m0 = m; - for (pfh = pfil_hook_get(PFIL_IN); pfh; pfh = pfh->pfil_link.le_next) + for (pfh = pfil_hook_get(PFIL_IN); pfh; pfh = pfh->pfil_link.tqe_next) if (pfh->pfil_func) { rv = pfh->pfil_func(ip, hlen, m->m_pkthdr.rcvif, 0, &m0); if (rv) goto next; - ip = mtod(m = m0, struct ip *); + m = m0; + if (m == NULL) + goto next; + ip = mtod(m, struct ip *); } #endif /* PFIL_HOOKS */ @@ -281,12 +359,22 @@ next: /* * Check our list of addresses, to see if the packet is for us. */ - for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) { - if (in_hosteq(ip->ip_dst, ia->ia_addr.sin_addr)) - goto ours; - if (((ip_directedbcast == 0) || (ip_directedbcast && - ia->ia_ifp == m->m_pkthdr.rcvif)) && - (ia->ia_ifp->if_flags & IFF_BROADCAST)) { + s = splimp(); + INADDR_TO_IA(ip->ip_dst, ia); + if (ia != NULL) { + ifa_delref(&ia->ia_ifa); + splx(s); + goto ours; + } + if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { + for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first; + ifa != NULL; ifa = ifa->ifa_list.tqe_next) { + if (ifa->ifa_addr->sa_family != AF_INET) continue; + ia = ifatoia(ifa); + /* + * Don't ifa_addref ia as we don't use it after + * we splx(s) below + */ if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) || in_hosteq(ip->ip_dst, ia->ia_netbroadcast) || /* @@ -294,17 +382,21 @@ next: * either for subnet or net. */ ip->ip_dst.s_addr == ia->ia_subnet || - ip->ip_dst.s_addr == ia->ia_net) + ip->ip_dst.s_addr == ia->ia_net) { + splx(s); + goto ours; + } + /* + * An interface with IP address zero accepts + * all packets that arrive on that interface. + */ + if (in_nullhost(ia->ia_addr.sin_addr)) { + splx(s); goto ours; + } } - /* - * An interface with IP address zero accepts - * all packets that arrive on that interface. - */ - if ((ia->ia_ifp == m->m_pkthdr.rcvif) && - in_nullhost(ia->ia_addr.sin_addr)) - goto ours; } + splx(s); if (IN_MULTICAST(ip->ip_dst.s_addr)) { struct in_multi *inm; #ifdef MROUTING @@ -423,8 +515,7 @@ found: */ if (mff || ip->ip_off) { ipstat.ips_fragments++; - MALLOC(ipqe, struct ipqent *, sizeof (struct ipqent), - M_IPQ, M_NOWAIT); + ipqe = pool_get(&ipqent_pool, PR_NOWAIT); if (ipqe == NULL) { ipstat.ips_rcvmemdrop++; goto bad; @@ -437,6 +528,7 @@ found: goto next; ipstat.ips_reassembled++; ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; } else if (fp) ip_freef(fp); @@ -540,7 +632,7 @@ ip_reass(ipqe, fp) nq = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); } insert: @@ -579,11 +671,11 @@ insert: m->m_next = 0; m_cat(m, t); nq = q->ipqe_q.le_next; - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); for (q = nq; q != NULL; q = nq) { t = q->ipqe_m; nq = q->ipqe_q.le_next; - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); m_cat(m, t); } @@ -612,7 +704,7 @@ insert: dropfrag: ipstat.ips_fragdropped++; m_freem(m); - FREE(ipqe, M_IPQ); + pool_put(&ipqent_pool, ipqe); return (0); } @@ -630,7 +722,7 @@ ip_freef(fp) p = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); } LIST_REMOVE(fp, ipq_q); FREE(fp, M_FTABLE); @@ -654,6 +746,9 @@ ip_slowtimo() ip_freef(fp); } } +#ifdef GATEWAY + ipflow_slowtimo(); +#endif splx(s); } @@ -750,6 +845,7 @@ ip_dooptions(m) * End of source route. Should be for us. */ save_rte(cp, ip->ip_src); + ifa_delref(&ia->ia_ifa); break; } /* @@ -757,6 +853,7 @@ ip_dooptions(m) */ bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr, sizeof(ipaddr.sin_addr)); + ifa_delref(&ia->ia_ifa); if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * #define SA struct sockaddr * @@ -776,6 +873,7 @@ ip_dooptions(m) * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ip->ip_dst.s_addr); + ifa_delref(&ia->ia_ifa); break; case IPOPT_RR: @@ -804,6 +902,7 @@ ip_dooptions(m) bcopy((caddr_t)&ia->ia_addr.sin_addr, (caddr_t)(cp + off), sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); + ifa_delref(&ia->ia_ifa); break; case IPOPT_TS: @@ -823,7 +922,7 @@ ip_dooptions(m) break; case IPOPT_TS_TSANDADDR: - if (ipt->ipt_ptr + sizeof(n_time) + + if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; ipaddr.sin_addr = dst; @@ -833,17 +932,20 @@ ip_dooptions(m) continue; bcopy((caddr_t)&ia->ia_addr.sin_addr, (caddr_t)sin, sizeof(struct in_addr)); + ifa_delref(&ia->ia_ifa); ipt->ipt_ptr += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: - if (ipt->ipt_ptr + sizeof(n_time) + + if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, sizeof(struct in_addr)); - if (ifa_ifwithaddr((SA)&ipaddr) == 0) + ia = (INA)ifa_ifwithaddr((SA)&ipaddr); + if (ia == 0) continue; + ifa_delref(&ia->ia_ifa); ipt->ipt_ptr += sizeof(struct in_addr); break; @@ -882,6 +984,8 @@ ip_rtaddr(dst) struct in_addr dst; { register struct sockaddr_in *sin; + struct in_ifaddr *ia; + int s; sin = satosin(&ipforward_rt.ro_dst); @@ -898,7 +1002,11 @@ ip_rtaddr(dst) } if (ipforward_rt.ro_rt == 0) return ((struct in_ifaddr *)0); - return (ifatoia(ipforward_rt.ro_rt->rt_ifa)); + s = splimp(); + ia = (ifatoia(ipforward_rt.ro_rt->rt_ifa)); + ifa_addref(&ia->ia_ifa); + splx(s); + return ia; } /* @@ -1059,8 +1167,9 @@ ip_forward(m, srcrt) dest = 0; #ifdef DIAGNOSTIC if (ipprintfs) - printf("forward: src %x dst %x ttl %x\n", - ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl); + printf("forward: src %2.2x dst %2.2x ttl %x\n", + ntohl(ip->ip_src.s_addr), + ntohl(ip->ip_dst.s_addr), ip->ip_ttl); #endif if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) { ipstat.ips_cantforward++; @@ -1137,8 +1246,13 @@ ip_forward(m, srcrt) if (type) ipstat.ips_redirectsent++; else { - if (mcopy) + if (mcopy) { +#ifdef GATEWAY + if (mcopy->m_flags & M_CANFASTFWD) + ipflow_create(&ipforward_rt, mcopy); +#endif m_freem(mcopy); + } return; } } @@ -1284,8 +1398,16 @@ ip_sysctl(name, namelen, oldp, oldlenp, return (sysctl_int(oldp, oldlenp, newp, newlen, &subnetsarelocal)); case IPCTL_MTUDISC: - return (sysctl_int(oldp, oldlenp, newp, newlen, - &ip_mtudisc)); + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_mtudisc); + if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) { + ip_mtudisc_timeout_q = + rt_timer_queue_create(ip_mtudisc_timeout); + } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) { + rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE); + ip_mtudisc_timeout_q = NULL; + } + return error; case IPCTL_ANONPORTMIN: old = anonportmin; error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmin); @@ -1310,6 +1432,26 @@ ip_sysctl(name, namelen, oldp, oldlenp, return (EINVAL); } return (error); + case IPCTL_MTUDISCTIMEOUT: + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_mtudisc_timeout); + if (ip_mtudisc_timeout_q != NULL) + rt_timer_queue_change(ip_mtudisc_timeout_q, + ip_mtudisc_timeout); + return (error); +#ifdef GATEWAY + case IPCTL_MAXFLOWS: + { + int s; + + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_maxflows); + s = splsoftnet(); + ipflow_reap(0); + splx(s); + return (error); + } +#endif default: return (EOPNOTSUPP); }