Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/ip_input.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.334 retrieving revision 1.337.2.3 diff -u -p -r1.334 -r1.337.2.3 --- src/sys/netinet/ip_input.c 2016/07/06 05:27:52 1.334 +++ src/sys/netinet/ip_input.c 2017/01/07 08:56:51 1.337.2.3 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_input.c,v 1.334 2016/07/06 05:27:52 ozaki-r Exp $ */ +/* $NetBSD: ip_input.c,v 1.337.2.3 2017/01/07 08:56:51 pgoyette Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.334 2016/07/06 05:27:52 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.337.2.3 2017/01/07 08:56:51 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -101,6 +101,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v #include "opt_mrouting.h" #include "opt_mbuftrace.h" #include "opt_inet_csum.h" +#include "opt_net_mpsafe.h" #endif #include "arp.h" @@ -286,7 +287,7 @@ static void ipintr(void *); static void ip_input(struct mbuf *); static void ip_forward(struct mbuf *, int, struct ifnet *); static bool ip_dooptions(struct mbuf *); -static struct in_ifaddr *ip_rtaddr(struct in_addr); +static struct in_ifaddr *ip_rtaddr(struct in_addr, struct psref *); static void sysctl_net_inet_ip_setup(struct sysctllog **); static struct in_ifaddr *ip_match_our_address(struct ifnet *, struct ip *, @@ -294,9 +295,13 @@ static struct in_ifaddr *ip_match_our_ad static struct in_ifaddr *ip_match_our_address_broadcast(struct ifnet *, struct ip *); -/* XXX: Not yet enabled. */ +#ifdef NET_MPSAFE +#define SOFTNET_LOCK() mutex_enter(softnet_lock) +#define SOFTNET_UNLOCK() mutex_exit(softnet_lock) +#else #define SOFTNET_LOCK() KASSERT(mutex_owned(softnet_lock)) #define SOFTNET_UNLOCK() KASSERT(mutex_owned(softnet_lock)) +#endif /* * IP initialization: fill in IP protocol switch table. @@ -330,7 +335,6 @@ ip_init(void) ip_ids = ip_id_init(); ip_id = time_uptime & 0xfffff; - ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout); #ifdef GATEWAY ipflow_init(); #endif @@ -381,7 +385,7 @@ ip_match_our_address(struct ifnet *ifp, if ((ia->ia_ifp->if_flags & IFF_UP) != 0) break; else - downmatch++; + (*downmatch)++; } } @@ -394,7 +398,7 @@ ip_match_our_address_broadcast(struct if struct in_ifaddr *ia = NULL; struct ifaddr *ifa; - IFADDR_FOREACH(ifa, ifp) { + IFADDR_READER_FOREACH(ifa, ifp) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); @@ -432,11 +436,15 @@ ipintr(void *arg __unused) KASSERT(cpu_softintr_p()); +#ifndef NET_MPSAFE mutex_enter(softnet_lock); +#endif while ((m = pktq_dequeue(ip_pktq)) != NULL) { ip_input(m); } +#ifndef NET_MPSAFE mutex_exit(softnet_lock); +#endif } /* @@ -447,12 +455,13 @@ static void ip_input(struct mbuf *m) { struct ip *ip = NULL; - struct in_ifaddr *ia; + struct in_ifaddr *ia = NULL; int hlen = 0, len; int downmatch; int srcrt = 0; ifnet_t *ifp; struct psref psref; + int s; KASSERTMSG(cpu_softintr_p(), "ip_input: not in the software " "interrupt handler; synchronization assumptions violated"); @@ -469,9 +478,8 @@ ip_input(struct mbuf *m) * are receiving, can't do anything with incoming packets yet. * Note: we pre-check without locks held. */ - if (!TAILQ_FIRST(&in_ifaddrhead)) { + if (IN_ADDRLIST_READER_EMPTY()) goto out; - } IP_STATINC(IP_STAT_TOTAL); /* @@ -609,10 +617,9 @@ ip_input(struct mbuf *m) struct in_addr odst = ip->ip_dst; bool freed; - SOFTNET_LOCK(); freed = pfil_run_hooks(inet_pfil_hook, &m, ifp, PFIL_IN) != 0; - SOFTNET_UNLOCK(); if (freed || m == NULL) { + m = NULL; goto out; } ip = mtod(m, struct ip *); @@ -642,6 +649,7 @@ ip_input(struct mbuf *m) if ((*altq_input)(m, AF_INET) == 0) { /* Packet dropped by traffic conditioner. */ SOFTNET_UNLOCK(); + m = NULL; goto out; } SOFTNET_UNLOCK(); @@ -655,8 +663,10 @@ ip_input(struct mbuf *m) * to be sent and the original packet to be freed). */ ip_nhops = 0; /* for source routed packets */ - if (hlen > sizeof (struct ip) && ip_dooptions(m)) + if (hlen > sizeof (struct ip) && ip_dooptions(m)) { + m = NULL; goto out; + } /* * Check our list of addresses, to see if the packet is for us. @@ -666,15 +676,21 @@ ip_input(struct mbuf *m) * or IN_IFF_NOTREADY addresses as not mine. */ downmatch = 0; + s = pserialize_read_enter(); ia = ip_match_our_address(ifp, ip, &downmatch); - if (ia != NULL) + if (ia != NULL) { + pserialize_read_exit(s); goto ours; + } if (ifp->if_flags & IFF_BROADCAST) { ia = ip_match_our_address_broadcast(ifp, ip); - if (ia != NULL) + if (ia != NULL) { + pserialize_read_exit(s); goto ours; + } } + pserialize_read_exit(s); if (IN_MULTICAST(ip->ip_dst.s_addr)) { #ifdef MROUTING @@ -811,8 +827,17 @@ ours: * Switch out to protocol's input routine. */ #if IFA_STATS - if (ia && ip) - ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); + if (ia && ip) { + struct in_ifaddr *_ia; + /* + * Keep a reference from ip_match_our_address with psref + * is expensive, so explore ia here again. + */ + s = pserialize_read_enter(); + _ia = in_get_ia(ip->ip_dst); + _ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len); + pserialize_read_exit(s); + } #endif IP_STATINC(IP_STAT_DELIVERED); @@ -874,6 +899,8 @@ ip_dooptions(struct mbuf *m) int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; struct in_addr dst; n_time ntime; + struct ifaddr *ifa; + int s; dst = ip->ip_dst; cp = (u_char *)(ip + 1); @@ -910,7 +937,8 @@ ip_dooptions(struct mbuf *m) * address is on directly accessible net. */ case IPOPT_LSRR: - case IPOPT_SSRR: + case IPOPT_SSRR: { + struct psref psref; if (ip_allowsrcrt == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_NET_PROHIB; @@ -925,8 +953,11 @@ ip_dooptions(struct mbuf *m) goto bad; } ipaddr.sin_addr = ip->ip_dst; - ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))); - if (ia == 0) { + + s = pserialize_read_enter(); + ifa = ifa_ifwithaddr(sintosa(&ipaddr)); + if (ifa == NULL) { + pserialize_read_exit(s); if (opt == IPOPT_SSRR) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; @@ -938,6 +969,8 @@ ip_dooptions(struct mbuf *m) */ break; } + pserialize_read_exit(s); + off--; /* 0 origin */ if ((off + sizeof(struct in_addr)) > optlen) { /* @@ -951,11 +984,17 @@ ip_dooptions(struct mbuf *m) */ memcpy((void *)&ipaddr.sin_addr, (void *)(cp + off), sizeof(ipaddr.sin_addr)); - if (opt == IPOPT_SSRR) - ia = ifatoia(ifa_ifwithladdr(sintosa(&ipaddr))); - else - ia = ip_rtaddr(ipaddr.sin_addr); - if (ia == 0) { + if (opt == IPOPT_SSRR) { + ifa = ifa_ifwithladdr_psref(sintosa(&ipaddr), + &psref); + if (ifa != NULL) + ia = ifatoia(ifa); + else + ia = NULL; + } else { + ia = ip_rtaddr(ipaddr.sin_addr, &psref); + } + if (ia == NULL) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; @@ -963,14 +1002,17 @@ ip_dooptions(struct mbuf *m) ip->ip_dst = ipaddr.sin_addr; bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ip->ip_dst.s_addr); break; + } - case IPOPT_RR: + case IPOPT_RR: { + struct psref psref; if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; @@ -991,17 +1033,23 @@ ip_dooptions(struct mbuf *m) * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ - if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))) - == NULL && - (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) { - type = ICMP_UNREACH; - code = ICMP_UNREACH_HOST; - goto bad; + ifa = ifa_ifwithaddr_psref(sintosa(&ipaddr), &psref); + if (ifa == NULL) { + ia = ip_rtaddr(ipaddr.sin_addr, &psref); + if (ia == NULL) { + type = ICMP_UNREACH; + code = ICMP_UNREACH_HOST; + goto bad; + } + } else { + ia = ifatoia(ifa); } bcopy((void *)&ia->ia_addr.sin_addr, (void *)(cp + off), sizeof(struct in_addr)); + ia4_release(ia, &psref); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; + } case IPOPT_TS: code = cp - (u_char *)ip; @@ -1030,7 +1078,7 @@ ip_dooptions(struct mbuf *m) case IPOPT_TS_TSANDADDR: { struct ifnet *rcvif; - int s; + int _s, _ss; if (ipt->ipt_ptr - 1 + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) { @@ -1039,14 +1087,18 @@ ip_dooptions(struct mbuf *m) goto bad; } ipaddr.sin_addr = dst; - rcvif = m_get_rcvif(m, &s); - ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr), - rcvif)); - m_put_rcvif(rcvif, &s); - if (ia == 0) - continue; + _ss = pserialize_read_enter(); + rcvif = m_get_rcvif(m, &_s); + ifa = ifaof_ifpforaddr(sintosa(&ipaddr), rcvif); + m_put_rcvif(rcvif, &_s); + if (ifa == NULL) { + pserialize_read_exit(_ss); + break; + } + ia = ifatoia(ifa); bcopy(&ia->ia_addr.sin_addr, cp0, sizeof(struct in_addr)); + pserialize_read_exit(_ss); ipt->ipt_ptr += sizeof(struct in_addr); break; } @@ -1060,9 +1112,13 @@ ip_dooptions(struct mbuf *m) } memcpy(&ipaddr.sin_addr, cp0, sizeof(struct in_addr)); - if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))) - == NULL) + s = pserialize_read_enter(); + ifa = ifa_ifwithaddr(sintosa(&ipaddr)); + if (ifa == NULL) { + pserialize_read_exit(s); continue; + } + pserialize_read_exit(s); ipt->ipt_ptr += sizeof(struct in_addr); break; @@ -1081,7 +1137,7 @@ ip_dooptions(struct mbuf *m) } if (forward) { struct ifnet *rcvif; - struct psref psref; + struct psref _psref; if (ip_forwsrcrt == 0) { type = ICMP_UNREACH; @@ -1089,14 +1145,14 @@ ip_dooptions(struct mbuf *m) goto bad; } - rcvif = m_get_rcvif_psref(m, &psref); + rcvif = m_get_rcvif_psref(m, &_psref); if (__predict_false(rcvif == NULL)) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; } ip_forward(m, 1, rcvif); - m_put_rcvif_psref(rcvif, &psref); + m_put_rcvif_psref(rcvif, &_psref); return true; } return false; @@ -1111,7 +1167,7 @@ bad: * return internet address info of interface to be used to get there. */ static struct in_ifaddr * -ip_rtaddr(struct in_addr dst) +ip_rtaddr(struct in_addr dst, struct psref *psref) { struct rtentry *rt; union { @@ -1121,11 +1177,16 @@ ip_rtaddr(struct in_addr dst) sockaddr_in_init(&u.dst4, &dst, 0); - SOFTNET_LOCK(); - rt = rtcache_lookup(&ipforward_rt, &u.dst); - SOFTNET_UNLOCK(); - if (rt == NULL) + ro = percpu_getref(ipforward_rt_percpu); + rt = rtcache_lookup(ro, &u.dst); + if (rt == NULL) { + percpu_putref(ipforward_rt_percpu); return NULL; + } + + ia4_acquire(ifatoia(rt->rt_ifa), psref); + rtcache_unref(rt, ro); + percpu_putref(ipforward_rt_percpu); return ifatoia(rt->rt_ifa); } @@ -1276,19 +1337,18 @@ ip_forward(struct mbuf *m, int srcrt, st return; } - SOFTNET_LOCK(); - if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); - SOFTNET_UNLOCK(); return; } sockaddr_in_init(&u.dst4, &ip->ip_dst, 0); - if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) { + ro = percpu_getref(ipforward_rt_percpu); + rt = rtcache_lookup(ro, &u.dst); + if (rt == NULL) { + percpu_putref(ipforward_rt_percpu); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0); - SOFTNET_UNLOCK(); return; } @@ -1330,6 +1390,7 @@ ip_forward(struct mbuf *m, int srcrt, st code = ICMP_REDIRECT_HOST; } } + rtcache_unref(rt, ro); error = ip_output(m, NULL, &ipforward_rt, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), @@ -1358,13 +1419,13 @@ ip_forward(struct mbuf *m, int srcrt, st m_freem(mcopy); } - SOFTNET_UNLOCK(); + percpu_putref(ipforward_rt_percpu); return; redirect: error: if (mcopy == NULL) { - SOFTNET_UNLOCK(); + percpu_putref(ipforward_rt_percpu); return; } @@ -1387,8 +1448,10 @@ error: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - if ((rt = rtcache_validate(&ipforward_rt)) != NULL) + if ((rt = rtcache_validate(ro)) != NULL) { destmtu = rt->rt_ifp->if_mtu; + rtcache_unref(rt, ro); + } #ifdef IPSEC if (ipsec_used) (void)ipsec4_forward(mcopy, &destmtu); @@ -1405,11 +1468,11 @@ error: */ if (mcopy) m_freem(mcopy); - SOFTNET_UNLOCK(); + percpu_putref(ipforward_rt_percpu); return; } icmp_error(mcopy, type, code, dest, destmtu); - SOFTNET_UNLOCK(); + percpu_putref(ipforward_rt_percpu); } void @@ -1473,8 +1536,8 @@ ip_savecontrol(struct inpcb *inp, struct if (inpflags & INP_RECVIF) { struct sockaddr_dl sdl; - sockaddr_dl_init(&sdl, sizeof(sdl), ifp ? - ifp->if_index : 0, 0, NULL, 0, NULL, 0); + sockaddr_dl_init(&sdl, sizeof(sdl), ifp->if_index, 0, NULL, 0, + NULL, 0); *mp = sbcreatecontrol(&sdl, sdl.sdl_len, IP_RECVIF, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next;