Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.92 retrieving revision 1.116.2.3 diff -u -p -r1.92 -r1.116.2.3 --- src/sys/netinet/raw_ip.c 2006/09/19 21:42:30 1.92 +++ src/sys/netinet/raw_ip.c 2013/09/23 00:57:53 1.116.2.3 @@ -1,4 +1,4 @@ -/* $NetBSD: raw_ip.c,v 1.92 2006/09/19 21:42:30 elad Exp $ */ +/* $NetBSD: raw_ip.c,v 1.116.2.3 2013/09/23 00:57:53 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -60,23 +60,25 @@ * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ +/* + * Raw interface to IP protocol. + */ + #include -__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.92 2006/09/19 21:42:30 elad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.116.2.3 2013/09/23 00:57:53 rmind Exp $"); #include "opt_inet.h" +#include "opt_compat_netbsd.h" #include "opt_ipsec.h" #include "opt_mrouting.h" #include #include -#include #include #include #include #include -#include #include -#include #include #include @@ -86,30 +88,26 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include #include +#include #include #include #include #include #include -#include - #ifdef IPSEC -#include -#endif /*IPSEC*/ - -#ifdef FAST_IPSEC #include -#include /* XXX ipsecstat namespace */ -#endif /* FAST_IPSEC*/ +#include +#include +#endif -struct inpcbtable rawcbtable; +#ifdef COMPAT_50 +#include +#endif -int rip_pcbnotify(struct inpcbtable *, struct in_addr, - struct in_addr, int, int, void (*)(struct inpcb *, int)); -int rip_bind(struct inpcb *, struct mbuf *); -int rip_connect(struct inpcb *, struct mbuf *); -void rip_disconnect(struct inpcb *); +static inpcbtable_t * rawcbtable __read_mostly; + +static void sysctl_net_inet_raw_setup(struct sysctllog **); /* * Nominal space allocated to a raw ip socket. @@ -117,35 +115,107 @@ void rip_disconnect(struct inpcb *); #define RIPSNDQ 8192 #define RIPRCVQ 8192 -/* - * Raw interface to IP protocol. - */ +static u_long rip_sendspace = RIPSNDQ; +static u_long rip_recvspace = RIPRCVQ; + +struct rip_input_ctx { + struct mbuf * mbuf; + struct ip * ip; + struct sockaddr_in src; + unsigned hlen; + unsigned nfound; +}; + +struct rip_ctlinput_ctx { + struct ip * ip; + struct in_addr addr; + int errno; +}; -/* - * Initialize raw connection block q. - */ void rip_init(void) { - - in_pcbinit(&rawcbtable, 1, 1); + rawcbtable = inpcb_init(1, 1, 0); + sysctl_net_inet_raw_setup(NULL); } /* - * Setup generic address and protocol structures - * for raw_input routine, then pass them along with - * mbuf chain. + * rip_append: pass the received datagram to the process. */ +static void +rip_append(inpcb_t *inp, struct rip_input_ctx *rctx) +{ + struct socket *so = inpcb_get_socket(inp); + int inpflags = inpcb_get_flags(inp); + struct mbuf *n, *opts = NULL; + + /* XXX: Might optimise this, but not with a silly loop! */ + if ((n = m_copypacket(rctx->mbuf, M_DONTWAIT)) == NULL) { + return; + } + + if (inpflags & INP_NOHEADER) { + m_adj(n, rctx->hlen); + } + + if ((inpflags & INP_CONTROLOPTS) != 0 +#ifdef SO_OTIMESTAMP + || (so->so_options & SO_OTIMESTAMP) != 0 +#endif + || (so->so_options & SO_TIMESTAMP) != 0) { + struct ip *ip = rctx->ip; + ip_savecontrol(inp, &opts, ip, n); + } + + if (sbappendaddr(&so->so_rcv, sintosa(&rctx->src), n, opts) == 0) { + /* Should notify about lost packet. */ + if (opts) { + m_freem(opts); + } + m_freem(n); + } else { + sorwakeup(so); + } +} + +static int +rip_pcb_process(inpcb_t *inp, void *arg) +{ + struct rip_input_ctx *rctx = arg; + const struct ip *ip = rctx->ip; + struct ip *inp_ip = in_getiphdr(inp); + struct in_addr laddr, faddr; + + if (inp_ip->ip_p && inp_ip->ip_p != ip->ip_p) { + return 0; + } + inpcb_get_addrs(inp, &laddr, &faddr); + + if (!in_nullhost(laddr) && !in_hosteq(laddr, ip->ip_dst)) { + return 0; + } + if (!in_nullhost(faddr) && !in_hosteq(faddr, ip->ip_src)) { + return 0; + } + +#if defined(IPSEC) + /* Check AH/ESP integrity. */ + if (ipsec4_in_reject_so(rctx->mbuf, inpcb_get_socket(inp))) { + /* Do not inject data into PCB. */ + IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + return 0; + } +#endif + rip_append(inp, rctx); + rctx->nfound++; + return 0; +} + void rip_input(struct mbuf *m, ...) { - int proto; struct ip *ip = mtod(m, struct ip *); - struct inpcb_hdr *inph; - struct inpcb *inp; - struct inpcb *last = 0; - struct mbuf *opts = 0; - struct sockaddr_in ripsrc; + int error, hlen, proto; va_list ap; va_start(ap, m); @@ -153,123 +223,68 @@ rip_input(struct mbuf *m, ...) proto = va_arg(ap, int); va_end(ap); - ripsrc.sin_family = AF_INET; - ripsrc.sin_len = sizeof(struct sockaddr_in); - ripsrc.sin_addr = ip->ip_src; - ripsrc.sin_port = 0; - bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); + KASSERTMSG((proto == ip->ip_p), "%s: protocol mismatch", __func__); /* - * XXX Compatibility: programs using raw IP expect ip_len - * XXX to have the header length subtracted, and in host order. - * XXX ip_off is also expected to be host order. + * Compatibility: programs using raw IP expect ip_len field to have + * the header length subtracted. Also, ip_len and ip_off fields are + * expected to be in host order. */ - ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); + hlen = ip->ip_hl << 2; + ip->ip_len = ntohs(ip->ip_len) - hlen; NTOHS(ip->ip_off); - CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { - inp = (struct inpcb *)inph; - if (inp->inp_af != AF_INET) - continue; - if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) - continue; - if (!in_nullhost(inp->inp_laddr) && - !in_hosteq(inp->inp_laddr, ip->ip_dst)) - continue; - if (!in_nullhost(inp->inp_faddr) && - !in_hosteq(inp->inp_faddr, ip->ip_src)) - continue; - if (last) { - struct mbuf *n; - -#if defined(IPSEC) || defined(FAST_IPSEC) - /* check AH/ESP integrity. */ - if (ipsec4_in_reject_so(m, last->inp_socket)) { - ipsecstat.in_polvio++; - /* do not inject data to pcb */ - } else -#endif /*IPSEC*/ - if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, n); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), n, opts) == 0) { - /* should notify about lost packet */ - m_freem(n); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - opts = NULL; - } - } - last = inp; + /* Save some context for the iterator. */ + struct rip_input_ctx rctx = { + .mbuf = m, .ip = ip, .hlen = hlen, .nfound = 0 + }; + sockaddr_in_init(&rctx.src, &ip->ip_src, 0); + + /* Scan all raw IP PCBs for matching entries. */ + error = inpcb_foreach(rawcbtable, AF_INET, rip_pcb_process, &rctx); + KASSERT(error == 0); + + /* Done, if found any. */ + if (rctx.nfound) { + return; } -#if defined(IPSEC) || defined(FAST_IPSEC) - /* check AH/ESP integrity. */ - if (last && ipsec4_in_reject_so(m, last->inp_socket)) { - m_freem(m); - ipsecstat.in_polvio++; - ipstat.ips_delivered--; - /* do not inject data to pcb */ - } else -#endif /*IPSEC*/ - if (last) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, m); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), m, opts) == 0) { - m_freem(m); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); + + if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { + uint64_t *ips; + + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); + ips = IP_STAT_GETREF(); + ips[IP_STAT_NOPROTO]++; + ips[IP_STAT_DELIVERED]--; + IP_STAT_PUTREF(); } else { - if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, - 0, 0); - ipstat.ips_noproto++; - ipstat.ips_delivered--; - } else - m_freem(m); + m_freem(m); } - return; } -int -rip_pcbnotify(struct inpcbtable *table, - struct in_addr faddr, struct in_addr laddr, int proto, int errno, - void (*notify)(struct inpcb *, int)) -{ - struct inpcb *inp, *ninp; - int nmatch; - - nmatch = 0; - for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); - inp != (struct inpcb *)&table->inpt_queue; - inp = ninp) { - ninp = (struct inpcb *)inp->inp_queue.cqe_next; - if (inp->inp_af != AF_INET) - continue; - if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) - continue; - if (in_hosteq(inp->inp_faddr, faddr) && - in_hosteq(inp->inp_laddr, laddr)) { - (*notify)(inp, errno); - nmatch++; - } +static int +rip_pcbnotify(inpcb_t *inp, void *arg) +{ + struct rip_ctlinput_ctx *rctx = arg; + const struct ip *ip = rctx->ip; + struct ip *inp_ip = in_getiphdr(inp); + struct in_addr laddr, faddr; + + if (inp_ip->ip_p && inp_ip->ip_p != ip->ip_p) { + return 0; } + inpcb_get_addrs(inp, &laddr, &faddr); - return nmatch; + if (in_hosteq(faddr, rctx->addr) && in_hosteq(laddr, ip->ip_src)) { + inpcb_rtchange(inp, rctx->errno); + } + return 0; } void * -rip_ctlinput(int cmd, struct sockaddr *sa, void *v) +rip_ctlinput(int cmd, const struct sockaddr *sa, void *v) { struct ip *ip = v; - void (*notify)(struct inpcb *, int) = in_rtchange; int errno; if (sa->sa_family != AF_INET || @@ -278,133 +293,164 @@ rip_ctlinput(int cmd, struct sockaddr *s if ((unsigned)cmd >= PRC_NCMDS) return NULL; errno = inetctlerrmap[cmd]; - if (PRC_IS_REDIRECT(cmd)) - notify = in_rtchange, ip = 0; - else if (cmd == PRC_HOSTDEAD) - ip = 0; - else if (errno == 0) + + if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD || ip == NULL) { + inpcb_notifyall(rawcbtable, satocsin(sa)->sin_addr, + errno, inpcb_rtchange); + return NULL; + } else if (errno == 0) { return NULL; - if (ip) { - rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, - ip->ip_src, ip->ip_p, errno, notify); - - /* XXX mapped address case */ - } else - in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, - notify); + } + + /* Note: mapped address case. */ + struct rip_ctlinput_ctx rctx = { + .ip = ip, .addr = satocsin(sa)->sin_addr, .errno = errno + }; + (void)inpcb_foreach(rawcbtable, AF_INET, rip_pcbnotify, &rctx); + return NULL; } /* - * Generate IP header and pass packet to ip_output. + * Generate IP header and pass packet to the IP output routine. * Tack on options user may have setup with control call. */ int rip_output(struct mbuf *m, ...) { - struct inpcb *inp; + inpcb_t *inp; + struct socket *so; struct ip *ip; struct mbuf *opts; - int flags; + int flags, inpflags; va_list ap; va_start(ap, m); - inp = va_arg(ap, struct inpcb *); + inp = va_arg(ap, inpcb_t *); va_end(ap); - flags = - (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST - | IP_RETURNMTU; + so = inpcb_get_socket(inp); + KASSERT(solocked(so)); + + flags = (so->so_options & SO_DONTROUTE) | + IP_ALLOWBROADCAST | IP_RETURNMTU; + inpflags = inpcb_get_flags(inp); /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ - if ((inp->inp_flags & INP_HDRINCL) == 0) { + if ((inpflags & INP_HDRINCL) == 0) { + struct ip *inp_ip = in_getiphdr(inp); + if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { m_freem(m); - return (EMSGSIZE); + return EMSGSIZE; } M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (!m) - return (ENOBUFS); + if (m == NULL) { + return ENOBUFS; + } ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_off = htons(0); - ip->ip_p = inp->inp_ip.ip_p; + ip->ip_p = inp_ip->ip_p; ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_src = inp->inp_laddr; - ip->ip_dst = inp->inp_faddr; + inpcb_get_addrs(inp, &ip->ip_src, &ip->ip_dst); + ip->ip_ttl = MAXTTL; - opts = inp->inp_options; + opts = inpcb_get_options(inp); } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); - return (EMSGSIZE); + return EMSGSIZE; } ip = mtod(m, struct ip *); /* - * If the mbuf is read-only, we need to allocate - * a new mbuf for the header, since we need to - * modify the header. + * If the mbuf is read-only, we need to allocate a new mbuf + * for the header, since we need to modify the header. */ if (M_READONLY(m)) { - int hlen = ip->ip_hl << 2; + const int hlen = ip->ip_hl << 2; m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); - if (m == NULL) - return (ENOMEM); /* XXX */ + if (m == NULL) { + return ENOMEM; /* XXX */ + } ip = mtod(m, struct ip *); } - /* XXX userland passes ip_len and ip_off in host order */ + /* + * Applications on raw sockets pass us packets + * in host byte order. + */ if (m->m_pkthdr.len != ip->ip_len) { m_freem(m); return (EINVAL); } HTONS(ip->ip_len); HTONS(ip->ip_off); - if (ip->ip_id == 0) - ip->ip_id = ip_newid(); + if (ip->ip_id || m->m_pkthdr.len < IP_MINFRAGSIZE) { + flags |= IP_NOIPNEWID; + } opts = NULL; - /* XXX prevent ip_output from overwriting header fields */ + + /* + * Note: prevent IP output from overwriting header fields. + */ flags |= IP_RAWOUTPUT; - ipstat.ips_rawout++; + IP_STATINC(IP_STAT_RAWOUT); } - return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, - inp->inp_socket, &inp->inp_errormtu)); + + return ip_output(m, opts, inpcb_get_route(inp), flags, + inpcb_get_moptions(inp), so); } /* * Raw IP socket option processing. */ int -rip_ctloutput(int op, struct socket *so, int level, int optname, - struct mbuf **m) +rip_ctloutput(int op, struct socket *so, struct sockopt *sopt) { - struct inpcb *inp = sotoinpcb(so); - int error = 0; + inpcb_t *inp = sotoinpcb(so); + int inpflags = inpcb_get_flags(inp); + int error = 0, optval; + + KASSERT(solocked(so)); + + if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) { + if (op == PRCO_GETOPT) { + optval = (inpflags & INP_NOHEADER) ? 1 : 0; + error = sockopt_set(sopt, &optval, sizeof(optval)); + } else if (op == PRCO_SETOPT) { + error = sockopt_getint(sopt, &optval); + if (error) + goto out; + if (optval) { + inpflags &= ~INP_HDRINCL; + inpflags |= INP_NOHEADER; + } else + inpflags &= ~INP_NOHEADER; + } + goto out; + } - if (level != IPPROTO_IP) { - error = ENOPROTOOPT; - if (op == PRCO_SETOPT && *m != 0) - (void) m_free(*m); - } else switch (op) { + if (sopt->sopt_level != IPPROTO_IP) { + return ip_ctloutput(op, so, sopt); + } + switch (op) { case PRCO_SETOPT: - switch (optname) { + switch (sopt->sopt_name) { case IP_HDRINCL: - if (*m == 0 || (*m)->m_len < sizeof (int)) - error = EINVAL; - else { - if (*mtod(*m, int *)) - inp->inp_flags |= INP_HDRINCL; - else - inp->inp_flags &= ~INP_HDRINCL; - } - if (*m != 0) - (void) m_free(*m); + error = sockopt_getint(sopt, &optval); + if (error) + break; + if (optval) + inpflags |= INP_HDRINCL; + else + inpflags &= ~INP_HDRINCL; break; #ifdef MROUTING @@ -418,23 +464,21 @@ rip_ctloutput(int op, struct socket *so, case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: - error = ip_mrouter_set(so, optname, m); + error = ip_mrouter_set(so, sopt); break; #endif default: - error = ip_ctloutput(op, so, level, optname, m); + error = ip_ctloutput(op, so, sopt); break; } break; case PRCO_GETOPT: - switch (optname) { + switch (sopt->sopt_name) { case IP_HDRINCL: - *m = m_get(M_WAIT, MT_SOOPTS); - MCLAIM((*m), so->so_mowner); - (*m)->m_len = sizeof (int); - *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; + optval = inpflags & INP_HDRINCL; + error = sockopt_set(sopt, &optval, sizeof(optval)); break; #ifdef MROUTING @@ -442,132 +486,145 @@ rip_ctloutput(int op, struct socket *so, case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: - error = ip_mrouter_get(so, optname, m); + error = ip_mrouter_get(so, sopt); break; #endif default: - error = ip_ctloutput(op, so, level, optname, m); + error = ip_ctloutput(op, so, sopt); break; } break; } - return (error); + out: + if (!error) { + inpcb_set_flags(inp, inpflags); + } + return error; } -int -rip_bind(struct inpcb *inp, struct mbuf *nam) +static int +rip_bind(inpcb_t *inp, struct mbuf *nam) { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); if (nam->m_len != sizeof(*addr)) - return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) - return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) - return (EAFNOSUPPORT); - if (!in_nullhost(addr->sin_addr) && - ifa_ifwithaddr(sintosa(addr)) == 0) - return (EADDRNOTAVAIL); - inp->inp_laddr = addr->sin_addr; - return (0); + return EINVAL; + if (!IFNET_FIRST()) + return EADDRNOTAVAIL; + if (addr->sin_family != AF_INET) + return EAFNOSUPPORT; + if (!in_nullhost(addr->sin_addr) && !ifa_ifwithaddr(sintosa(addr))) + return EADDRNOTAVAIL; + + inpcb_set_addrs(inp, &addr->sin_addr, NULL); + return 0; } -int -rip_connect(struct inpcb *inp, struct mbuf *nam) +static int +rip_connect(inpcb_t *inp, struct mbuf *nam) { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); if (nam->m_len != sizeof(*addr)) - return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) - return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) - return (EAFNOSUPPORT); - inp->inp_faddr = addr->sin_addr; - return (0); + return EINVAL; + if (!IFNET_FIRST()) + return EADDRNOTAVAIL; + if (addr->sin_family != AF_INET) + return EAFNOSUPPORT; + + inpcb_set_addrs(inp, NULL, &addr->sin_addr); + return 0; } -void -rip_disconnect(struct inpcb *inp) +static void +rip_disconnect(inpcb_t *inp) { - - inp->inp_faddr = zeroin_addr; + inpcb_set_addrs(inp, NULL, &zeroin_addr); } -u_long rip_sendspace = RIPSNDQ; -u_long rip_recvspace = RIPRCVQ; +static int +rip_attach(struct socket *so, int proto) +{ + inpcb_t *inp; + struct ip *ip; + int error; -/*ARGSUSED*/ -int -rip_usrreq(struct socket *so, int req, - struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) + KASSERT(sotoinpcb(so) == NULL); + sosetlock(so); + + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = soreserve(so, rip_sendspace, rip_recvspace); + if (error) { + return error; + } + } + + solock(so); + error = inpcb_create(so, rawcbtable); + if (error) { + sounlock(so); + return error; + } + inp = sotoinpcb(so); + ip = in_getiphdr(inp); + ip->ip_p = proto; + sounlock(so); + + return 0; +} + +static void +rip_detach(struct socket *so) { - struct inpcb *inp; - int s; - int error = 0; + inpcb_t *inp; + + KASSERT(solocked(so)); + inp = sotoinpcb(so); + KASSERT(inp != NULL); + #ifdef MROUTING extern struct socket *ip_mrouter; + if (so == ip_mrouter) { + ip_mrouter_done(); + } #endif + inpcb_destroy(inp); +} - if (req == PRU_CONTROL) - return (in_control(so, (long)m, (caddr_t)nam, - (struct ifnet *)control, l)); +int +rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, + struct mbuf *control, struct lwp *l) +{ + inpcb_t *inp; + int error = 0; + + KASSERT(req != PRU_ATTACH); + KASSERT(req != PRU_DETACH); + if (req == PRU_CONTROL) { + return in_control(so, (long)m, nam, (ifnet_t *)control, l); + } if (req == PRU_PURGEIF) { - in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); - in_purgeif((struct ifnet *)control); - in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); - return (0); + int s = splsoftnet(); + mutex_enter(softnet_lock); + inpcb_purgeif0(rawcbtable, (ifnet_t *)control); + in_purgeif((ifnet_t *)control); + inpcb_purgeif(rawcbtable, (ifnet_t *)control); + mutex_exit(softnet_lock); + splx(s); + return 0; } - s = splsoftnet(); + KASSERT(solocked(so)); inp = sotoinpcb(so); -#ifdef DIAGNOSTIC - if (req != PRU_SEND && req != PRU_SENDOOB && control) - panic("rip_usrreq: unexpected control mbuf"); -#endif - if (inp == 0 && req != PRU_ATTACH) { - error = EINVAL; - goto release; + + KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB)); + if (inp == NULL) { + return EINVAL; } switch (req) { - - case PRU_ATTACH: - if (inp != 0) { - error = EISCONN; - break; - } - if (l == 0 || (error = kauth_authorize_network(l->l_cred, - KAUTH_NETWORK_SOCKET, - KAUTH_REQ_NETWORK_SOCKET_RAWSOCK, so, NULL, - NULL))) { - error = EACCES; - break; - } - if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { - error = soreserve(so, rip_sendspace, rip_recvspace); - if (error) - break; - } - error = in_pcballoc(so, &rawcbtable); - if (error) - break; - inp = sotoinpcb(so); - inp->inp_ip.ip_p = (long)nam; - break; - - case PRU_DETACH: -#ifdef MROUTING - if (so == ip_mrouter) - ip_mrouter_done(); -#endif - in_pcbdetach(inp); - break; - case PRU_BIND: error = rip_bind(inp, nam); break; @@ -614,36 +671,26 @@ rip_usrreq(struct socket *so, int req, error = EINVAL; break; } - { - if (nam) { - if ((so->so_state & SS_ISCONNECTED) != 0) { - error = EISCONN; - goto die; - } - error = rip_connect(inp, nam); - if (error) { - die: - m_freem(m); - break; - } - } else { - if ((so->so_state & SS_ISCONNECTED) == 0) { - error = ENOTCONN; - goto die; - } + if ((so->so_state & SS_ISCONNECTED) != 0) { + error = nam ? EISCONN : ENOTCONN; + m_freem(m); + break; + } + if (nam && (error = rip_connect(inp, nam)) != 0) { + m_freem(m); + break; } error = rip_output(m, inp); - if (nam) + if (nam) { rip_disconnect(inp); - } + } break; case PRU_SENSE: /* - * stat: don't bother with a blocksize. + * Stat: do not bother with a blocksize. */ - splx(s); - return (0); + return 0; case PRU_RCVOOB: error = EOPNOTSUPP; @@ -656,25 +703,33 @@ rip_usrreq(struct socket *so, int req, break; case PRU_SOCKADDR: - in_setsockaddr(inp, nam); + inpcb_fetch_sockaddr(inp, nam); break; case PRU_PEERADDR: - in_setpeeraddr(inp, nam); + inpcb_fetch_peeraddr(inp, nam); break; default: - panic("rip_usrreq"); + KASSERT(false); } -release: - splx(s); - return (error); + return error; } -SYSCTL_SETUP(sysctl_net_inet_raw_setup, "sysctl net.inet.raw subtree setup") -{ +PR_WRAP_USRREQ(rip_usrreq) +#define rip_usrreq rip_usrreq_wrapper + +const struct pr_usrreqs rip_usrreqs = { + .pr_attach = rip_attach, + .pr_detach = rip_detach, + .pr_generic = rip_usrreq, +}; + +static void +sysctl_net_inet_raw_setup(struct sysctllog **clog) +{ sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "net", NULL, @@ -691,12 +746,11 @@ SYSCTL_SETUP(sysctl_net_inet_raw_setup, SYSCTL_DESCR("Raw IPv4 settings"), NULL, 0, NULL, 0, CTL_NET, PF_INET, IPPROTO_RAW, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_STRUCT, "pcblist", SYSCTL_DESCR("Raw IPv4 control block list"), - sysctl_inpcblist, 0, &rawcbtable, 0, + sysctl_inpcblist, 0, rawcbtable, 0, CTL_NET, PF_INET, IPPROTO_RAW, CTL_CREATE, CTL_EOL); }