Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.62 retrieving revision 1.82.2.7 diff -u -p -r1.62 -r1.82.2.7 --- src/sys/netinet/ip_input.c 1998/04/29 20:45:30 1.62 +++ src/sys/netinet/ip_input.c 2001/05/30 09:44:09 1.82.2.7 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.62 1998/04/29 20:45:30 matt Exp $ */ +/* $NetBSD: ip_input.c,v 1.82.2.7 2001/05/30 09:44:09 he Exp $ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. @@ -73,6 +73,7 @@ */ #include "opt_gateway.h" +#include "opt_pfil_hooks.h" #include "opt_mrouting.h" #include @@ -87,6 +88,7 @@ #include #include #include +#include #include #include @@ -159,6 +161,66 @@ int ipqmaxlen = IFQ_MAXLEN; struct in_ifaddrhead in_ifaddr; struct in_ifaddrhashhead *in_ifaddrhashtbl; struct ifqueue ipintrq; +struct ipstat ipstat; +u_int16_t ip_id; +int ip_defttl; + +struct ipqhead ipq; +int ipq_locked; +int ip_nfragpackets = 0; +int ip_maxfragpackets = 200; + +static __inline int ipq_lock_try __P((void)); +static __inline void ipq_unlock __P((void)); + +static __inline int +ipq_lock_try() +{ + int s; + + s = splimp(); + if (ipq_locked) { + splx(s); + return (0); + } + ipq_locked = 1; + splx(s); + return (1); +} + +static __inline void +ipq_unlock() +{ + int s; + + s = splimp(); + ipq_locked = 0; + splx(s); +} + +#ifdef DIAGNOSTIC +#define IPQ_LOCK() \ +do { \ + if (ipq_lock_try() == 0) { \ + printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \ + panic("ipq_lock"); \ + } \ +} while (0) +#define IPQ_LOCK_CHECK() \ +do { \ + if (ipq_locked == 0) { \ + printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \ + panic("ipq lock check"); \ + } \ +} while (0) +#else +#define IPQ_LOCK() (void) ipq_lock_try() +#define IPQ_LOCK_CHECK() /* nothing */ +#endif + +#define IPQ_UNLOCK() ipq_unlock() + +struct pool ipqent_pool; /* * We need to save the IP options in case a protocol wants to respond @@ -187,6 +249,9 @@ ip_init() register struct protosw *pr; register int i; + pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", + 0, NULL, NULL, M_IPQ); + pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); @@ -206,6 +271,9 @@ ip_init() if (ip_mtudisc != 0) ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); +#ifdef GATEWAY + ipflow_init(); +#endif } struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; @@ -274,7 +342,17 @@ next: } ip = mtod(m, struct ip *); } - if ((ip->ip_sum = in_cksum(m, hlen)) != 0) { + + /* + * RFC1122: packets with a multicast source address are + * not allowed. + */ + if (IN_MULTICAST(ip->ip_src.s_addr)) { + /* XXX stat */ + goto bad; + } + + if (in_cksum(m, hlen) != 0) { ipstat.ips_badsum++; goto bad; } @@ -283,11 +361,19 @@ next: * Convert fields to host representation. */ NTOHS(ip->ip_len); - NTOHS(ip->ip_id); NTOHS(ip->ip_off); len = ip->ip_len; /* + * Check for additional length bogosity + */ + if (len < hlen) + { + ipstat.ips_badlen++; + goto bad; + } + + /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. @@ -305,9 +391,19 @@ next: m_adj(m, len - m->m_pkthdr.len); } + /* + * Assume that we can create a fast-forward IP flow entry + * based on this packet. + */ + m->m_flags |= M_CANFASTFWD; + #ifdef PFIL_HOOKS /* - * Run through list of hooks for input packets. + * Run through list of hooks for input packets. If there are any + * filters which require that additional packets in the flow are + * not fast-forwarded, they must clear the M_CANFASTFWD flag. + * Note that filters must _never_ set this flag, as another filter + * in the list may have previously cleared it. */ m0 = m; for (pfh = pfil_hook_get(PFIL_IN); pfh; pfh = pfh->pfil_link.tqe_next) @@ -315,7 +411,10 @@ next: rv = pfh->pfil_func(ip, hlen, m->m_pkthdr.rcvif, 0, &m0); if (rv) goto next; - ip = mtod(m = m0, struct ip *); + m = m0; + if (m == NULL) + goto next; + ip = mtod(m, struct ip *); } #endif /* PFIL_HOOKS */ @@ -333,7 +432,8 @@ next: * Check our list of addresses, to see if the packet is for us. */ INADDR_TO_IA(ip->ip_dst, ia); - if (ia != NULL) goto ours; + if (ia != NULL) + goto ours; if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first; ifa != NULL; ifa = ifa->ifa_list.tqe_next) { @@ -382,13 +482,11 @@ next: * as expected when ip_mforward() is called from * ip_output().) */ - ip->ip_id = htons(ip->ip_id); if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) { ipstat.ips_cantforward++; m_freem(m); goto next; } - ip->ip_id = ntohs(ip->ip_id); /* * The process-level routing demon needs to receive @@ -439,6 +537,7 @@ ours: * Look for queue of fragments * of this datagram. */ + IPQ_LOCK(); for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next) if (ip->ip_id == fp->ipq_id && in_hosteq(ip->ip_src, fp->ipq_src) && @@ -462,6 +561,7 @@ found: */ if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { ipstat.ips_badfrags++; + IPQ_UNLOCK(); goto bad; } } @@ -474,29 +574,36 @@ found: */ if (mff || ip->ip_off) { ipstat.ips_fragments++; - MALLOC(ipqe, struct ipqent *, sizeof (struct ipqent), - M_IPQ, M_NOWAIT); + ipqe = pool_get(&ipqent_pool, PR_NOWAIT); if (ipqe == NULL) { ipstat.ips_rcvmemdrop++; + IPQ_UNLOCK(); goto bad; } ipqe->ipqe_mff = mff; ipqe->ipqe_m = m; ipqe->ipqe_ip = ip; m = ip_reass(ipqe, fp); - if (m == 0) + if (m == 0) { + IPQ_UNLOCK(); goto next; + } ipstat.ips_reassembled++; ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + ip->ip_len += hlen; } else if (fp) ip_freef(fp); - } else - ip->ip_len -= hlen; + IPQ_UNLOCK(); + } /* * Switch out to protocol's input routine. */ +#if IFA_STATS + ia->ia_ifa.ifa_data.ifad_inbytes += ip->ip_len; +#endif ipstat.ips_delivered++; (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); goto next; @@ -523,6 +630,8 @@ ip_reass(ipqe, fp) int hlen = ipqe->ipqe_ip->ip_hl << 2; int i, next; + IPQ_LOCK_CHECK(); + /* * Presence of header sizes in mbufs * would confuse code below. @@ -534,6 +643,17 @@ ip_reass(ipqe, fp) * If first fragment to arrive, create a reassembly queue. */ if (fp == 0) { + /* + * Enforce upper bound on number of fragmented packets + * for which we attempt reassembly; + * If maxfrag is 0, never accept fragments. + * If maxfrag is -1, accept all fragments without limitation. + */ + if (ip_maxfragpackets < 0) + ; + else if (ip_nfragpackets >= ip_maxfragpackets) + goto dropfrag; + ip_nfragpackets++; MALLOC(fp, struct ipq *, sizeof (struct ipq), M_FTABLE, M_NOWAIT); if (fp == NULL) @@ -591,7 +711,7 @@ ip_reass(ipqe, fp) nq = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); } insert: @@ -630,11 +750,11 @@ insert: m->m_next = 0; m_cat(m, t); nq = q->ipqe_q.le_next; - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); for (q = nq; q != NULL; q = nq) { t = q->ipqe_m; nq = q->ipqe_q.le_next; - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); m_cat(m, t); } @@ -649,6 +769,7 @@ insert: ip->ip_dst = fp->ipq_dst; LIST_REMOVE(fp, ipq_q); FREE(fp, M_FTABLE); + ip_nfragpackets--; m->m_len += (ip->ip_hl << 2); m->m_data -= (ip->ip_hl << 2); /* some debugging cruft by sklower, below, will go away soon */ @@ -663,7 +784,7 @@ insert: dropfrag: ipstat.ips_fragdropped++; m_freem(m); - FREE(ipqe, M_IPQ); + pool_put(&ipqent_pool, ipqe); return (0); } @@ -677,14 +798,17 @@ ip_freef(fp) { register struct ipqent *q, *p; + IPQ_LOCK_CHECK(); + for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) { p = q->ipqe_q.le_next; m_freem(q->ipqe_m); LIST_REMOVE(q, ipqe_q); - FREE(q, M_IPQ); + pool_put(&ipqent_pool, q); } LIST_REMOVE(fp, ipq_q); FREE(fp, M_FTABLE); + ip_nfragpackets--; } /* @@ -698,6 +822,7 @@ ip_slowtimo() register struct ipq *fp, *nfp; int s = splsoftnet(); + IPQ_LOCK(); for (fp = ipq.lh_first; fp != NULL; fp = nfp) { nfp = fp->ipq_q.le_next; if (--fp->ipq_ttl == 0) { @@ -705,6 +830,21 @@ ip_slowtimo() ip_freef(fp); } } + /* + * If we are over the maximum number of fragments + * (due to the limit being lowered), drain off + * enough to get down to the new limit. + */ + if (ip_maxfragpackets < 0) + ; + else { + while (ip_nfragpackets > ip_maxfragpackets && ipq.lh_first) + ip_freef(ipq.lh_first); + } + IPQ_UNLOCK(); +#ifdef GATEWAY + ipflow_slowtimo(); +#endif splx(s); } @@ -715,10 +855,19 @@ void ip_drain() { + /* + * We may be called from a device's interrupt context. If + * the ipq is already busy, just bail out now. + */ + if (ipq_lock_try() == 0) + return; + while (ipq.lh_first != NULL) { ipstat.ips_fragdropped++; ip_freef(ipq.lh_first); } + + IPQ_UNLOCK(); } /* @@ -733,11 +882,11 @@ ip_dooptions(m) struct mbuf *m; { register struct ip *ip = mtod(m, struct ip *); - register u_char *cp; + register u_char *cp, *cp0; register struct ip_timestamp *ipt; register struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; - struct in_addr *sin, dst; + struct in_addr dst; n_time ntime; dst = ip->ip_dst; @@ -796,7 +945,7 @@ ip_dooptions(m) break; } off--; /* 0 origin */ - if (off > optlen - sizeof(struct in_addr)) { + if ((off + sizeof(struct in_addr)) > optlen) { /* * End of source route. Should be for us. */ @@ -838,7 +987,7 @@ ip_dooptions(m) * If no space remains, ignore. */ off--; /* 0 origin */ - if (off > optlen - sizeof(struct in_addr)) + if ((off + sizeof(struct in_addr)) > optlen) break; bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr, sizeof(ipaddr.sin_addr)); @@ -867,14 +1016,14 @@ ip_dooptions(m) goto bad; break; } - sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); + cp0 = (cp + ipt->ipt_ptr - 1); switch (ipt->ipt_flg) { case IPOPT_TS_TSONLY: break; case IPOPT_TS_TSANDADDR: - if (ipt->ipt_ptr + sizeof(n_time) + + if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; ipaddr.sin_addr = dst; @@ -882,16 +1031,16 @@ ip_dooptions(m) m->m_pkthdr.rcvif); if (ia == 0) continue; - bcopy((caddr_t)&ia->ia_addr.sin_addr, - (caddr_t)sin, sizeof(struct in_addr)); + bcopy(&ia->ia_addr.sin_addr, + cp0, sizeof(struct in_addr)); ipt->ipt_ptr += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: - if (ipt->ipt_ptr + sizeof(n_time) + + if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; - bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr, + bcopy(cp0, &ipaddr.sin_addr, sizeof(struct in_addr)); if (ifa_ifwithaddr((SA)&ipaddr) == 0) continue; @@ -902,7 +1051,8 @@ ip_dooptions(m) goto bad; } ntime = iptime(); - bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1, + cp0 = (u_char *) &ntime; /* XXX GCC BUG */ + bcopy(cp0, (caddr_t)cp + ipt->ipt_ptr - 1, sizeof(n_time)); ipt->ipt_ptr += sizeof(n_time); } @@ -918,7 +1068,6 @@ ip_dooptions(m) } return (0); bad: - ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */ icmp_error(m, type, code, 0, 0); ipstat.ips_badoptions++; return (1); @@ -1061,14 +1210,15 @@ ip_stripoptions(m, mopt) register caddr_t opts; int olen; - olen = (ip->ip_hl<<2) - sizeof (struct ip); + olen = (ip->ip_hl << 2) - sizeof (struct ip); opts = (caddr_t)(ip + 1); i = m->m_len - (sizeof (struct ip) + olen); bcopy(opts + olen, opts, (unsigned)i); m->m_len -= olen; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= olen; - ip->ip_hl = sizeof(struct ip) >> 2; + ip->ip_len -= olen; + ip->ip_hl = sizeof (struct ip) >> 2; } int inetctlerrmap[PRC_NCMDS] = { @@ -1110,15 +1260,15 @@ ip_forward(m, srcrt) dest = 0; #ifdef DIAGNOSTIC if (ipprintfs) - printf("forward: src %x dst %x ttl %x\n", - ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl); + printf("forward: src %2.2x dst %2.2x ttl %x\n", + ntohl(ip->ip_src.s_addr), + ntohl(ip->ip_dst.s_addr), ip->ip_ttl); #endif - if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) { + if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { ipstat.ips_cantforward++; m_freem(m); return; } - HTONS(ip->ip_id); if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); return; @@ -1165,16 +1315,20 @@ ip_forward(m, srcrt) if (rt->rt_ifa && (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) == ifatoia(rt->rt_ifa)->ia_subnet) { - if (rt->rt_flags & RTF_GATEWAY) - dest = satosin(rt->rt_gateway)->sin_addr.s_addr; - else - dest = ip->ip_dst.s_addr; - /* Router requirements says to only send host redirects */ - type = ICMP_REDIRECT; - code = ICMP_REDIRECT_HOST; + if (rt->rt_flags & RTF_GATEWAY) + dest = satosin(rt->rt_gateway)->sin_addr.s_addr; + else + dest = ip->ip_dst.s_addr; + /* + * Router requirements says to only send host + * redirects. + */ + type = ICMP_REDIRECT; + code = ICMP_REDIRECT_HOST; #ifdef DIAGNOSTIC - if (ipprintfs) - printf("redirect (%d) to %x\n", code, (u_int32_t)dest); + if (ipprintfs) + printf("redirect (%d) to %x\n", code, + (u_int32_t)dest); #endif } } @@ -1188,8 +1342,13 @@ ip_forward(m, srcrt) if (type) ipstat.ips_redirectsent++; else { - if (mcopy) + if (mcopy) { +#ifdef GATEWAY + if (mcopy->m_flags & M_CANFASTFWD) + ipflow_create(&ipforward_rt, mcopy); +#endif m_freem(mcopy); + } return; } } @@ -1376,6 +1535,24 @@ ip_sysctl(name, namelen, oldp, oldlenp, rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout); return (error); +#ifdef GATEWAY + case IPCTL_MAXFLOWS: + { + int s; + + error = sysctl_int(oldp, oldlenp, newp, newlen, + &ip_maxflows); + s = splsoftnet(); + ipflow_reap(0); + splx(s); + return (error); + } +#endif + + case IPCTL_MAXFRAGPACKETS: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &ip_maxfragpackets)); + default: return (EOPNOTSUPP); }