Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.60.8.2 retrieving revision 1.97.8.1 diff -u -p -r1.60.8.2 -r1.97.8.1 --- src/sys/netinet/raw_ip.c 2002/08/29 00:56:47 1.60.8.2 +++ src/sys/netinet/raw_ip.c 2007/11/06 23:33:51 1.97.8.1 @@ -1,4 +1,4 @@ -/* $NetBSD: raw_ip.c,v 1.60.8.2 2002/08/29 00:56:47 gehenna Exp $ */ +/* $NetBSD: raw_ip.c,v 1.97.8.1 2007/11/06 23:33:51 matt Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -41,11 +41,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -65,12 +61,14 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.60.8.2 2002/08/29 00:56:47 gehenna Exp $"); +__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.97.8.1 2007/11/06 23:33:51 matt Exp $"); +#include "opt_inet.h" #include "opt_ipsec.h" #include "opt_mrouting.h" #include +#include #include #include #include @@ -79,6 +77,7 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include #include +#include #include #include @@ -90,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include #include +#include #include #include @@ -98,13 +98,18 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #endif /*IPSEC*/ +#ifdef FAST_IPSEC +#include +#include /* XXX ipsecstat namespace */ +#endif /* FAST_IPSEC*/ + struct inpcbtable rawcbtable; -int rip_pcbnotify __P((struct inpcbtable *, struct in_addr, - struct in_addr, int, int, void (*) __P((struct inpcb *, int)))); -int rip_bind __P((struct inpcb *, struct mbuf *)); -int rip_connect __P((struct inpcb *, struct mbuf *)); -void rip_disconnect __P((struct inpcb *)); +int rip_pcbnotify(struct inpcbtable *, struct in_addr, + struct in_addr, int, int, void (*)(struct inpcb *, int)); +int rip_bind(struct inpcb *, struct mbuf *); +int rip_connect(struct inpcb *, struct mbuf *); +void rip_disconnect(struct inpcb *); /* * Nominal space allocated to a raw ip socket. @@ -120,13 +125,29 @@ void rip_disconnect __P((struct inpcb * * Initialize raw connection block q. */ void -rip_init() +rip_init(void) { in_pcbinit(&rawcbtable, 1, 1); } -static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; +static void +rip_sbappendaddr(struct inpcb *last, struct ip *ip, const struct sockaddr *sa, + int hlen, struct mbuf *opts, struct mbuf *n) +{ + if (last->inp_flags & INP_NOHEADER) + m_adj(n, hlen); + if (last->inp_flags & INP_CONTROLOPTS || + last->inp_socket->so_options & SO_TIMESTAMP) + ip_savecontrol(last, &opts, ip, n); + if (sbappendaddr(&last->inp_socket->so_rcv, sa, n, opts) == 0) { + /* should notify about lost packet */ + m_freem(n); + if (opts) + m_freem(opts); + } else + sorwakeup(last->inp_socket); +} /* * Setup generic address and protocol structures @@ -134,42 +155,37 @@ static struct sockaddr_in ripsrc = { siz * mbuf chain. */ void -#if __STDC__ rip_input(struct mbuf *m, ...) -#else -rip_input(m, va_alist) - struct mbuf *m; - va_dcl -#endif { - int off, proto; + int hlen, proto; struct ip *ip = mtod(m, struct ip *); + struct inpcb_hdr *inph; struct inpcb *inp; - struct inpcb *last = 0; - struct mbuf *opts = 0; + struct inpcb *last = NULL; + struct mbuf *n, *opts = NULL; struct sockaddr_in ripsrc; va_list ap; va_start(ap, m); - off = va_arg(ap, int); + (void)va_arg(ap, int); /* ignore value, advance ap */ proto = va_arg(ap, int); va_end(ap); - ripsrc.sin_family = AF_INET; - ripsrc.sin_len = sizeof(struct sockaddr_in); - ripsrc.sin_addr = ip->ip_src; - ripsrc.sin_port = 0; - bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); + sockaddr_in_init(&ripsrc, &ip->ip_src, 0); /* * XXX Compatibility: programs using raw IP expect ip_len * XXX to have the header length subtracted, and in host order. * XXX ip_off is also expected to be host order. */ - ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); + hlen = ip->ip_hl << 2; + ip->ip_len = ntohs(ip->ip_len) - hlen; NTOHS(ip->ip_off); - CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { + CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { + inp = (struct inpcb *)inph; + if (inp->inp_af != AF_INET) + continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) continue; if (!in_nullhost(inp->inp_laddr) && @@ -178,81 +194,58 @@ rip_input(m, va_alist) if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; - if (last) { - struct mbuf *n; - -#ifdef IPSEC - /* check AH/ESP integrity. */ - if (ipsec4_in_reject_so(m, last->inp_socket)) { - ipsecstat.in_polvio++; - /* do not inject data to pcb */ - } else + if (last == NULL) + ; +#if defined(IPSEC) || defined(FAST_IPSEC) + /* check AH/ESP integrity. */ + else if (ipsec4_in_reject_so(m, last->inp_socket)) { + ipsecstat.in_polvio++; + /* do not inject data to pcb */ + } #endif /*IPSEC*/ - if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, n); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), n, opts) == 0) { - /* should notify about lost packet */ - m_freem(n); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - opts = NULL; - } + else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) { + rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, opts, + n); + opts = NULL; } last = inp; } -#ifdef IPSEC +#if defined(IPSEC) || defined(FAST_IPSEC) /* check AH/ESP integrity. */ - if (last && ipsec4_in_reject_so(m, last->inp_socket)) { + if (last != NULL && ipsec4_in_reject_so(m, last->inp_socket)) { m_freem(m); ipsecstat.in_polvio++; ipstat.ips_delivered--; /* do not inject data to pcb */ } else #endif /*IPSEC*/ - if (last) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, m); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), m, opts) == 0) { - m_freem(m); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - } else { - if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, - 0, 0); - ipstat.ips_noproto++; - ipstat.ips_delivered--; - } else - m_freem(m); - } + if (last != NULL) + rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, opts, m); + else if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, + 0, 0); + ipstat.ips_noproto++; + ipstat.ips_delivered--; + } else + m_freem(m); return; } int -rip_pcbnotify(table, faddr, laddr, proto, errno, notify) - struct inpcbtable *table; - struct in_addr faddr, laddr; - int proto; - int errno; - void (*notify) __P((struct inpcb *, int)); +rip_pcbnotify(struct inpcbtable *table, + struct in_addr faddr, struct in_addr laddr, int proto, int errno, + void (*notify)(struct inpcb *, int)) { struct inpcb *inp, *ninp; int nmatch; nmatch = 0; - for (inp = CIRCLEQ_FIRST(&table->inpt_queue); + for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); inp != (struct inpcb *)&table->inpt_queue; inp = ninp) { - ninp = inp->inp_queue.cqe_next; + ninp = (struct inpcb *)inp->inp_queue.cqe_next; + if (inp->inp_af != AF_INET) + continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) continue; if (in_hosteq(inp->inp_faddr, faddr) && @@ -266,13 +259,10 @@ rip_pcbnotify(table, faddr, laddr, proto } void * -rip_ctlinput(cmd, sa, v) - int cmd; - struct sockaddr *sa; - void *v; +rip_ctlinput(int cmd, const struct sockaddr *sa, void *v) { struct ip *ip = v; - void (*notify) __P((struct inpcb *, int)) = in_rtchange; + void (*notify)(struct inpcb *, int) = in_rtchange; int errno; if (sa->sa_family != AF_INET || @@ -288,12 +278,12 @@ rip_ctlinput(cmd, sa, v) else if (errno == 0) return NULL; if (ip) { - rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr, + rip_pcbnotify(&rawcbtable, satocsin(sa)->sin_addr, ip->ip_src, ip->ip_p, errno, notify); /* XXX mapped address case */ } else - in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno, + in_pcbnotifyall(&rawcbtable, satocsin(sa)->sin_addr, errno, notify); return NULL; } @@ -303,13 +293,7 @@ rip_ctlinput(cmd, sa, v) * Tack on options user may have setup with control call. */ int -#if __STDC__ rip_output(struct mbuf *m, ...) -#else -rip_output(m, va_alist) - struct mbuf *m; - va_dcl -#endif { struct inpcb *inp; struct ip *ip; @@ -334,7 +318,9 @@ rip_output(m, va_alist) m_freem(m); return (EMSGSIZE); } - M_PREPEND(m, sizeof(struct ip), M_WAIT); + M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); + if (!m) + return (ENOBUFS); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_off = htons(0); @@ -350,6 +336,21 @@ rip_output(m, va_alist) return (EMSGSIZE); } ip = mtod(m, struct ip *); + + /* + * If the mbuf is read-only, we need to allocate + * a new mbuf for the header, since we need to + * modify the header. + */ + if (M_READONLY(m)) { + int hlen = ip->ip_hl << 2; + + m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3); + if (m == NULL) + return (ENOMEM); /* XXX */ + ip = mtod(m, struct ip *); + } + /* XXX userland passes ip_len and ip_off in host order */ if (m->m_pkthdr.len != ip->ip_len) { m_freem(m); @@ -358,55 +359,54 @@ rip_output(m, va_alist) HTONS(ip->ip_len); HTONS(ip->ip_off); if (ip->ip_id == 0) - ip->ip_id = htons(ip_id++); + ip->ip_id = ip_newid(); opts = NULL; /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat.ips_rawout++; } -#ifdef IPSEC - if (ipsec_setsocket(m, inp->inp_socket) != 0) { - m_freem(m); - return ENOBUFS; - } -#endif /*IPSEC*/ return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, - &inp->inp_errormtu)); + inp->inp_socket, &inp->inp_errormtu)); } /* * Raw IP socket option processing. */ int -rip_ctloutput(op, so, level, optname, m) - int op; - struct socket *so; - int level, optname; - struct mbuf **m; +rip_ctloutput(int op, struct socket *so, int level, int optname, + struct mbuf **m) { struct inpcb *inp = sotoinpcb(so); int error = 0; - if (level != IPPROTO_IP) { - error = ENOPROTOOPT; - if (op == PRCO_SETOPT && *m != 0) - (void) m_free(*m); - } else switch (op) { + if (level == SOL_SOCKET && optname == SO_NOHEADER) { + if (op == PRCO_GETOPT) { + *m = m_intopt(so, + (inp->inp_flags & INP_NOHEADER) ? 1 : 0); + return 0; + } else if (*m == NULL || (*m)->m_len < sizeof(int)) + error = EINVAL; + else if (*mtod(*m, int *)) { + inp->inp_flags &= ~INP_HDRINCL; + inp->inp_flags |= INP_NOHEADER; + } else + inp->inp_flags &= ~INP_NOHEADER; + goto free_m; + } else if (level != IPPROTO_IP) + return ip_ctloutput(op, so, level, optname, m); + + switch (op) { case PRCO_SETOPT: switch (optname) { case IP_HDRINCL: - if (*m == 0 || (*m)->m_len < sizeof (int)) + if (*m == NULL || (*m)->m_len < sizeof(int)) error = EINVAL; - else { - if (*mtod(*m, int *)) - inp->inp_flags |= INP_HDRINCL; - else - inp->inp_flags &= ~INP_HDRINCL; - } - if (*m != 0) - (void) m_free(*m); - break; + else if (*mtod(*m, int *)) + inp->inp_flags |= INP_HDRINCL; + else + inp->inp_flags &= ~INP_HDRINCL; + goto free_m; #ifdef MROUTING case MRT_INIT: @@ -416,6 +416,9 @@ rip_ctloutput(op, so, level, optname, m) case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_ASSERT: + case MRT_API_CONFIG: + case MRT_ADD_BW_UPCALL: + case MRT_DEL_BW_UPCALL: error = ip_mrouter_set(so, optname, m); break; #endif @@ -429,14 +432,14 @@ rip_ctloutput(op, so, level, optname, m) case PRCO_GETOPT: switch (optname) { case IP_HDRINCL: - *m = m_get(M_WAIT, M_SOOPTS); - (*m)->m_len = sizeof (int); - *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; + *m = m_intopt(so, inp->inp_flags & INP_HDRINCL ? 1 : 0); break; #ifdef MROUTING case MRT_VERSION: case MRT_ASSERT: + case MRT_API_SUPPORT: + case MRT_API_CONFIG: error = ip_mrouter_get(so, optname, m); break; #endif @@ -447,13 +450,15 @@ rip_ctloutput(op, so, level, optname, m) } break; } - return (error); + return error; +free_m: + if (op == PRCO_SETOPT && *m != NULL) + (void)m_free(*m); + return error; } int -rip_bind(inp, nam) - struct inpcb *inp; - struct mbuf *nam; +rip_bind(struct inpcb *inp, struct mbuf *nam) { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); @@ -472,9 +477,7 @@ rip_bind(inp, nam) } int -rip_connect(inp, nam) - struct inpcb *inp; - struct mbuf *nam; +rip_connect(struct inpcb *inp, struct mbuf *nam) { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); @@ -490,8 +493,7 @@ rip_connect(inp, nam) } void -rip_disconnect(inp) - struct inpcb *inp; +rip_disconnect(struct inpcb *inp) { inp->inp_faddr = zeroin_addr; @@ -502,11 +504,8 @@ u_long rip_recvspace = RIPRCVQ; /*ARGSUSED*/ int -rip_usrreq(so, req, m, nam, control, p) - struct socket *so; - int req; - struct mbuf *m, *nam, *control; - struct proc *p; +rip_usrreq(struct socket *so, int req, + struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct lwp *l) { struct inpcb *inp; int s; @@ -516,17 +515,19 @@ rip_usrreq(so, req, m, nam, control, p) #endif if (req == PRU_CONTROL) - return (in_control(so, (long)m, (caddr_t)nam, - (struct ifnet *)control, p)); + return (in_control(so, (long)m, (void *)nam, + (struct ifnet *)control, l)); + + s = splsoftnet(); if (req == PRU_PURGEIF) { in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); in_purgeif((struct ifnet *)control); in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); + splx(s); return (0); } - s = splsoftnet(); inp = sotoinpcb(so); #ifdef DIAGNOSTIC if (req != PRU_SEND && req != PRU_SENDOOB && control) @@ -544,10 +545,14 @@ rip_usrreq(so, req, m, nam, control, p) error = EISCONN; break; } - if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) { + + if (l == NULL) { error = EACCES; break; } + + /* XXX: raw socket permissions are checked in socreate() */ + if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, rip_sendspace, rip_recvspace); if (error) @@ -671,3 +676,32 @@ release: splx(s); return (error); } + +SYSCTL_SETUP(sysctl_net_inet_raw_setup, "sysctl net.inet.raw subtree setup") +{ + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "net", NULL, + NULL, 0, NULL, 0, + CTL_NET, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "inet", NULL, + NULL, 0, NULL, 0, + CTL_NET, PF_INET, CTL_EOL); + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "raw", + SYSCTL_DESCR("Raw IPv4 settings"), + NULL, 0, NULL, 0, + CTL_NET, PF_INET, IPPROTO_RAW, CTL_EOL); + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_STRUCT, "pcblist", + SYSCTL_DESCR("Raw IPv4 control block list"), + sysctl_inpcblist, 0, &rawcbtable, 0, + CTL_NET, PF_INET, IPPROTO_RAW, + CTL_CREATE, CTL_EOL); +}