Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.107.2.2 retrieving revision 1.171 diff -u -p -r1.107.2.2 -r1.171 --- src/sys/netinet/raw_ip.c 2010/03/11 15:04:28 1.107.2.2 +++ src/sys/netinet/raw_ip.c 2018/02/28 11:23:24 1.171 @@ -1,4 +1,4 @@ -/* $NetBSD: raw_ip.c,v 1.107.2.2 2010/03/11 15:04:28 yamt Exp $ */ +/* $NetBSD: raw_ip.c,v 1.171 2018/02/28 11:23:24 maxv Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -60,17 +60,22 @@ * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ +/* + * Raw interface to IP protocol. + */ + #include -__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.107.2.2 2010/03/11 15:04:28 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.171 2018/02/28 11:23:24 maxv Exp $"); +#ifdef _KERNEL_OPT #include "opt_inet.h" -#include "opt_compat_netbsd.h" #include "opt_ipsec.h" #include "opt_mrouting.h" +#include "opt_net_mpsafe.h" +#endif #include #include -#include #include #include #include @@ -81,7 +86,6 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include -#include #include #include @@ -94,30 +98,17 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include -#include - #ifdef IPSEC -#include -#include -#endif /* IPSEC */ - -#ifdef FAST_IPSEC #include #include -#include -#endif /* FAST_IPSEC */ - -#ifdef COMPAT_50 -#include #endif struct inpcbtable rawcbtable; int rip_pcbnotify(struct inpcbtable *, struct in_addr, struct in_addr, int, int, void (*)(struct inpcb *, int)); -int rip_bind(struct inpcb *, struct mbuf *); -int rip_connect(struct inpcb *, struct mbuf *); -void rip_disconnect(struct inpcb *); +static int rip_connect_pcb(struct inpcb *, struct sockaddr_in *); +static void rip_disconnect1(struct inpcb *); static void sysctl_net_inet_raw_setup(struct sysctllog **); @@ -127,6 +118,9 @@ static void sysctl_net_inet_raw_setup(st #define RIPSNDQ 8192 #define RIPRCVQ 8192 +static u_long rip_sendspace = RIPSNDQ; +static u_long rip_recvspace = RIPRCVQ; + /* * Raw interface to IP protocol. */ @@ -149,10 +143,7 @@ rip_sbappendaddr(struct inpcb *last, str if (last->inp_flags & INP_NOHEADER) m_adj(n, hlen); if (last->inp_flags & INP_CONTROLOPTS -#ifdef SO_OTIMESTAMP - || last->inp_socket->so_options & SO_OTIMESTAMP -#endif - || last->inp_socket->so_options & SO_TIMESTAMP) + || SOOPT_TIMESTAMP(last->inp_socket->so_options)) ip_savecontrol(last, &opts, ip, n); if (sbappendaddr(&last->inp_socket->so_rcv, sa, n, opts) == 0) { /* should notify about lost packet */ @@ -196,7 +187,7 @@ rip_input(struct mbuf *m, ...) ip->ip_len = ntohs(ip->ip_len) - hlen; NTOHS(ip->ip_off); - CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { + TAILQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { inp = (struct inpcb *)inph; if (inp->inp_af != AF_INET) continue; @@ -210,10 +201,9 @@ rip_input(struct mbuf *m, ...) continue; if (last == NULL) ; -#if defined(IPSEC) || defined(FAST_IPSEC) +#if defined(IPSEC) /* check AH/ESP integrity. */ - else if (ipsec4_in_reject_so(m, last->inp_socket)) { - IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + else if (ipsec_used && ipsec_in_reject(m, last)) { /* do not inject data to pcb */ } #endif /*IPSEC*/ @@ -224,11 +214,10 @@ rip_input(struct mbuf *m, ...) } last = inp; } -#if defined(IPSEC) || defined(FAST_IPSEC) +#if defined(IPSEC) /* check AH/ESP integrity. */ - if (last != NULL && ipsec4_in_reject_so(m, last->inp_socket)) { + if (ipsec_used && last != NULL && ipsec_in_reject(m, last)) { m_freem(m); - IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); IP_STATDEC(IP_STAT_DELIVERED); /* do not inject data to pcb */ } else @@ -254,14 +243,12 @@ rip_pcbnotify(struct inpcbtable *table, struct in_addr faddr, struct in_addr laddr, int proto, int errno, void (*notify)(struct inpcb *, int)) { - struct inpcb *inp, *ninp; + struct inpcb_hdr *inph, *ninph; int nmatch; nmatch = 0; - for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); - inp != (struct inpcb *)&table->inpt_queue; - inp = ninp) { - ninp = (struct inpcb *)inp->inp_queue.cqe_next; + TAILQ_FOREACH_SAFE(inph, &table->inpt_queue, inph_queue, ninph) { + struct inpcb *inp = (struct inpcb *)inph; if (inp->inp_af != AF_INET) continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) @@ -311,21 +298,30 @@ rip_ctlinput(int cmd, const struct socka * Tack on options user may have setup with control call. */ int -rip_output(struct mbuf *m, ...) +rip_output(struct mbuf *m, struct inpcb *inp, struct mbuf *control, + struct lwp *l) { - struct inpcb *inp; struct ip *ip; struct mbuf *opts; - int flags; - va_list ap; - - va_start(ap, m); - inp = va_arg(ap, struct inpcb *); - va_end(ap); - - flags = - (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST - | IP_RETURNMTU; + struct ip_pktopts pktopts; + kauth_cred_t cred; + int error, flags; + + flags = (inp->inp_socket->so_options & SO_DONTROUTE) | + IP_ALLOWBROADCAST | IP_RETURNMTU; + + if (l == NULL) + cred = NULL; + else + cred = l->l_cred; + + /* Setup IP outgoing packet options */ + memset(&pktopts, 0, sizeof(pktopts)); + error = ip_setpktopts(control, &pktopts, &flags, inp, cred); + if (control != NULL) + m_freem(control); + if (error != 0) + goto release; /* * If the user handed us a complete IP packet, use it. @@ -333,25 +329,27 @@ rip_output(struct mbuf *m, ...) */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) { - m_freem(m); - return (EMSGSIZE); + error = EMSGSIZE; + goto release; } M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (!m) - return (ENOBUFS); + if (!m) { + error = ENOBUFS; + goto release; + } ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_off = htons(0); ip->ip_p = inp->inp_ip.ip_p; ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_src = inp->inp_laddr; + ip->ip_src = pktopts.ippo_laddr.sin_addr; ip->ip_dst = inp->inp_faddr; ip->ip_ttl = MAXTTL; opts = inp->inp_options; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { - m_freem(m); - return (EMSGSIZE); + error = EMSGSIZE; + goto release; } ip = mtod(m, struct ip *); @@ -364,15 +362,17 @@ rip_output(struct mbuf *m, ...) int hlen = ip->ip_hl << 2; m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); - if (m == NULL) - return (ENOMEM); /* XXX */ + if (m == NULL) { + error = ENOMEM; /* XXX */ + goto release; + } ip = mtod(m, struct ip *); } /* XXX userland passes ip_len and ip_off in host order */ if (m->m_pkthdr.len != ip->ip_len) { - m_freem(m); - return (EINVAL); + error = EINVAL; + goto release; } HTONS(ip->ip_len); HTONS(ip->ip_off); @@ -383,8 +383,18 @@ rip_output(struct mbuf *m, ...) flags |= IP_RAWOUTPUT; IP_STATINC(IP_STAT_RAWOUT); } - return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, - inp->inp_socket, &inp->inp_errormtu)); + + /* + * IP output. Note: if IP_RETURNMTU flag is set, the MTU size + * will be stored in inp_errormtu. + */ + return ip_output(m, opts, &inp->inp_route, flags, pktopts.ippo_imo, + inp); + + release: + if (m != NULL) + m_freem(m); + return error; } /* @@ -477,238 +487,397 @@ rip_ctloutput(int op, struct socket *so, } int -rip_bind(struct inpcb *inp, struct mbuf *nam) +rip_connect_pcb(struct inpcb *inp, struct sockaddr_in *addr) { - struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); - if (nam->m_len != sizeof(*addr)) - return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) + if (IFNET_READER_EMPTY()) return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) + if (addr->sin_family != AF_INET) return (EAFNOSUPPORT); - if (!in_nullhost(addr->sin_addr) && - ifa_ifwithaddr(sintosa(addr)) == 0) - return (EADDRNOTAVAIL); - inp->inp_laddr = addr->sin_addr; + inp->inp_faddr = addr->sin_addr; return (0); } -int -rip_connect(struct inpcb *inp, struct mbuf *nam) +static void +rip_disconnect1(struct inpcb *inp) { - struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); - if (nam->m_len != sizeof(*addr)) - return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) - return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) - return (EAFNOSUPPORT); - inp->inp_faddr = addr->sin_addr; - return (0); + inp->inp_faddr = zeroin_addr; } -void -rip_disconnect(struct inpcb *inp) +static int +rip_attach(struct socket *so, int proto) { + struct inpcb *inp; + int error; - inp->inp_faddr = zeroin_addr; -} + KASSERT(sotoinpcb(so) == NULL); + sosetlock(so); -u_long rip_sendspace = RIPSNDQ; -u_long rip_recvspace = RIPRCVQ; + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { + error = soreserve(so, rip_sendspace, rip_recvspace); + if (error) { + return error; + } + } -/*ARGSUSED*/ -int -rip_usrreq(struct socket *so, int req, - struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) + error = in_pcballoc(so, &rawcbtable); + if (error) { + return error; + } + inp = sotoinpcb(so); + inp->inp_ip.ip_p = proto; + KASSERT(solocked(so)); + + return 0; +} + +static void +rip_detach(struct socket *so) { struct inpcb *inp; - int s; - int error = 0; + + KASSERT(solocked(so)); + inp = sotoinpcb(so); + KASSERT(inp != NULL); + #ifdef MROUTING extern struct socket *ip_mrouter; + if (so == ip_mrouter) { + ip_mrouter_done(); + } #endif + in_pcbdetach(inp); +} - if (req == PRU_CONTROL) - return in_control(so, (long)m, nam, (struct ifnet *)control, l); +static int +rip_accept(struct socket *so, struct sockaddr *nam) +{ + KASSERT(solocked(so)); - s = splsoftnet(); + panic("rip_accept"); - if (req == PRU_PURGEIF) { - mutex_enter(softnet_lock); - in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); - in_purgeif((struct ifnet *)control); - in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); - mutex_exit(softnet_lock); - splx(s); - return (0); - } + return EOPNOTSUPP; +} - inp = sotoinpcb(so); -#ifdef DIAGNOSTIC - if (req != PRU_SEND && req != PRU_SENDOOB && control) - panic("rip_usrreq: unexpected control mbuf"); -#endif - if (inp == NULL && req != PRU_ATTACH) { - error = EINVAL; +static int +rip_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) +{ + struct inpcb *inp = sotoinpcb(so); + struct sockaddr_in *addr = (struct sockaddr_in *)nam; + int error = 0; + int s, ss; + struct ifaddr *ifa; + + KASSERT(solocked(so)); + KASSERT(inp != NULL); + KASSERT(nam != NULL); + + if (addr->sin_len != sizeof(*addr)) + return EINVAL; + + s = splsoftnet(); + if (IFNET_READER_EMPTY()) { + error = EADDRNOTAVAIL; + goto release; + } + if (addr->sin_family != AF_INET) { + error = EAFNOSUPPORT; + goto release; + } + ss = pserialize_read_enter(); + if ((ifa = ifa_ifwithaddr(sintosa(addr))) == NULL && + !in_nullhost(addr->sin_addr)) + { + pserialize_read_exit(ss); + error = EADDRNOTAVAIL; + goto release; + } + if (ifa && (ifatoia(ifa))->ia4_flags & IN6_IFF_DUPLICATED) { + pserialize_read_exit(ss); + error = EADDRNOTAVAIL; goto release; } + pserialize_read_exit(ss); - switch (req) { + inp->inp_laddr = addr->sin_addr; - case PRU_ATTACH: - sosetlock(so); - if (inp != 0) { - error = EISCONN; - break; - } +release: + splx(s); + return error; +} - if (l == NULL) { - error = EACCES; - break; - } +static int +rip_listen(struct socket *so, struct lwp *l) +{ + KASSERT(solocked(so)); - /* XXX: raw socket permissions are checked in socreate() */ + return EOPNOTSUPP; +} - if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { - error = soreserve(so, rip_sendspace, rip_recvspace); - if (error) - break; - } - error = in_pcballoc(so, &rawcbtable); - if (error) - break; - inp = sotoinpcb(so); - inp->inp_ip.ip_p = (long)nam; - break; +static int +rip_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) +{ + struct inpcb *inp = sotoinpcb(so); + int error = 0; + int s; - case PRU_DETACH: -#ifdef MROUTING - if (so == ip_mrouter) - ip_mrouter_done(); -#endif - in_pcbdetach(inp); - break; + KASSERT(solocked(so)); + KASSERT(inp != NULL); + KASSERT(nam != NULL); - case PRU_BIND: - error = rip_bind(inp, nam); - break; + s = splsoftnet(); + error = rip_connect_pcb(inp, (struct sockaddr_in *)nam); + if (! error) + soisconnected(so); + splx(s); - case PRU_LISTEN: - error = EOPNOTSUPP; - break; + return error; +} - case PRU_CONNECT: - error = rip_connect(inp, nam); - if (error) - break; - soisconnected(so); - break; +static int +rip_connect2(struct socket *so, struct socket *so2) +{ + KASSERT(solocked(so)); - case PRU_CONNECT2: - error = EOPNOTSUPP; - break; + return EOPNOTSUPP; +} - case PRU_DISCONNECT: - soisdisconnected(so); - rip_disconnect(inp); - break; +static int +rip_disconnect(struct socket *so) +{ + struct inpcb *inp = sotoinpcb(so); + int s; + + KASSERT(solocked(so)); + KASSERT(inp != NULL); + + s = splsoftnet(); + soisdisconnected(so); + rip_disconnect1(inp); + splx(s); + + return 0; +} + +static int +rip_shutdown(struct socket *so) +{ + int s; + + KASSERT(solocked(so)); /* * Mark the connection as being incapable of further input. */ - case PRU_SHUTDOWN: - socantsendmore(so); - break; + s = splsoftnet(); + socantsendmore(so); + splx(s); - case PRU_RCVD: - error = EOPNOTSUPP; - break; + return 0; +} + +static int +rip_abort(struct socket *so) +{ + KASSERT(solocked(so)); + + panic("rip_abort"); + + return EOPNOTSUPP; +} + +static int +rip_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) +{ + return in_control(so, cmd, nam, ifp); +} + +static int +rip_stat(struct socket *so, struct stat *ub) +{ + KASSERT(solocked(so)); + + /* stat: don't bother with a blocksize. */ + return 0; +} + +static int +rip_peeraddr(struct socket *so, struct sockaddr *nam) +{ + int s; + + KASSERT(solocked(so)); + KASSERT(sotoinpcb(so) != NULL); + KASSERT(nam != NULL); + + s = splsoftnet(); + in_setpeeraddr(sotoinpcb(so), (struct sockaddr_in *)nam); + splx(s); + + return 0; +} + +static int +rip_sockaddr(struct socket *so, struct sockaddr *nam) +{ + int s; + + KASSERT(solocked(so)); + KASSERT(sotoinpcb(so) != NULL); + KASSERT(nam != NULL); + + s = splsoftnet(); + in_setsockaddr(sotoinpcb(so), (struct sockaddr_in *)nam); + splx(s); + + return 0; +} + +static int +rip_rcvd(struct socket *so, int flags, struct lwp *l) +{ + KASSERT(solocked(so)); + + return EOPNOTSUPP; +} + +static int +rip_recvoob(struct socket *so, struct mbuf *m, int flags) +{ + KASSERT(solocked(so)); + + return EOPNOTSUPP; +} + +static int +rip_send(struct socket *so, struct mbuf *m, struct sockaddr *nam, + struct mbuf *control, struct lwp *l) +{ + struct inpcb *inp = sotoinpcb(so); + int error = 0; + int s; + + KASSERT(solocked(so)); + KASSERT(inp != NULL); + KASSERT(m != NULL); /* * Ship a packet out. The appropriate raw output * routine handles any massaging necessary. */ - case PRU_SEND: - if (control && control->m_len) { - m_freem(control); - m_freem(m); - error = EINVAL; - break; + s = splsoftnet(); + if (nam) { + if ((so->so_state & SS_ISCONNECTED) != 0) { + error = EISCONN; + goto die; } - { - if (nam) { - if ((so->so_state & SS_ISCONNECTED) != 0) { - error = EISCONN; - goto die; - } - error = rip_connect(inp, nam); - if (error) { - die: - m_freem(m); - break; - } - } else { - if ((so->so_state & SS_ISCONNECTED) == 0) { - error = ENOTCONN; - goto die; - } + error = rip_connect_pcb(inp, (struct sockaddr_in *)nam); + if (error) + goto die; + } else { + if ((so->so_state & SS_ISCONNECTED) == 0) { + error = ENOTCONN; + goto die; } - error = rip_output(m, inp); - if (nam) - rip_disconnect(inp); } - break; - - case PRU_SENSE: - /* - * stat: don't bother with a blocksize. - */ - splx(s); - return (0); + error = rip_output(m, inp, control, l); + m = NULL; + control = NULL; + if (nam) + rip_disconnect1(inp); + die: + if (m != NULL) + m_freem(m); + if (control != NULL) + m_freem(control); - case PRU_RCVOOB: - error = EOPNOTSUPP; - break; + splx(s); + return error; +} - case PRU_SENDOOB: - m_freem(control); - m_freem(m); - error = EOPNOTSUPP; - break; +static int +rip_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) +{ + KASSERT(solocked(so)); - case PRU_SOCKADDR: - in_setsockaddr(inp, nam); - break; + m_freem(m); + m_freem(control); - case PRU_PEERADDR: - in_setpeeraddr(inp, nam); - break; + return EOPNOTSUPP; +} - default: - panic("rip_usrreq"); - } +static int +rip_purgeif(struct socket *so, struct ifnet *ifp) +{ + int s; -release: + s = splsoftnet(); + mutex_enter(softnet_lock); + in_pcbpurgeif0(&rawcbtable, ifp); +#ifdef NET_MPSAFE + mutex_exit(softnet_lock); +#endif + in_purgeif(ifp); +#ifdef NET_MPSAFE + mutex_enter(softnet_lock); +#endif + in_pcbpurgeif(&rawcbtable, ifp); + mutex_exit(softnet_lock); splx(s); - return (error); + + return 0; } +PR_WRAP_USRREQS(rip) +#define rip_attach rip_attach_wrapper +#define rip_detach rip_detach_wrapper +#define rip_accept rip_accept_wrapper +#define rip_bind rip_bind_wrapper +#define rip_listen rip_listen_wrapper +#define rip_connect rip_connect_wrapper +#define rip_connect2 rip_connect2_wrapper +#define rip_disconnect rip_disconnect_wrapper +#define rip_shutdown rip_shutdown_wrapper +#define rip_abort rip_abort_wrapper +#define rip_ioctl rip_ioctl_wrapper +#define rip_stat rip_stat_wrapper +#define rip_peeraddr rip_peeraddr_wrapper +#define rip_sockaddr rip_sockaddr_wrapper +#define rip_rcvd rip_rcvd_wrapper +#define rip_recvoob rip_recvoob_wrapper +#define rip_send rip_send_wrapper +#define rip_sendoob rip_sendoob_wrapper +#define rip_purgeif rip_purgeif_wrapper + +const struct pr_usrreqs rip_usrreqs = { + .pr_attach = rip_attach, + .pr_detach = rip_detach, + .pr_accept = rip_accept, + .pr_bind = rip_bind, + .pr_listen = rip_listen, + .pr_connect = rip_connect, + .pr_connect2 = rip_connect2, + .pr_disconnect = rip_disconnect, + .pr_shutdown = rip_shutdown, + .pr_abort = rip_abort, + .pr_ioctl = rip_ioctl, + .pr_stat = rip_stat, + .pr_peeraddr = rip_peeraddr, + .pr_sockaddr = rip_sockaddr, + .pr_rcvd = rip_rcvd, + .pr_recvoob = rip_recvoob, + .pr_send = rip_send, + .pr_sendoob = rip_sendoob, + .pr_purgeif = rip_purgeif, +}; + static void sysctl_net_inet_raw_setup(struct sysctllog **clog) { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, - CTLTYPE_NODE, "net", NULL, - NULL, 0, NULL, 0, - CTL_NET, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL);