Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/netinet/raw_ip.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.94.4.2 retrieving revision 1.120 diff -u -p -r1.94.4.2 -r1.120 --- src/sys/netinet/raw_ip.c 2007/03/12 05:59:38 1.94.4.2 +++ src/sys/netinet/raw_ip.c 2014/05/18 14:46:16 1.120 @@ -1,4 +1,4 @@ -/* $NetBSD: raw_ip.c,v 1.94.4.2 2007/03/12 05:59:38 rmind Exp $ */ +/* $NetBSD: raw_ip.c,v 1.120 2014/05/18 14:46:16 rmind Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -61,9 +61,10 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.94.4.2 2007/03/12 05:59:38 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.120 2014/05/18 14:46:16 rmind Exp $"); #include "opt_inet.h" +#include "opt_compat_netbsd.h" #include "opt_ipsec.h" #include "opt_mrouting.h" @@ -86,22 +87,22 @@ __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1 #include #include #include +#include #include #include #include #include #include -#include - #ifdef IPSEC -#include -#endif /*IPSEC*/ - -#ifdef FAST_IPSEC #include -#include /* XXX ipsecstat namespace */ -#endif /* FAST_IPSEC*/ +#include +#include +#endif /* IPSEC */ + +#ifdef COMPAT_50 +#include +#endif struct inpcbtable rawcbtable; @@ -111,6 +112,8 @@ int rip_bind(struct inpcb *, struct mbu int rip_connect(struct inpcb *, struct mbuf *); void rip_disconnect(struct inpcb *); +static void sysctl_net_inet_raw_setup(struct sysctllog **); + /* * Nominal space allocated to a raw ip socket. */ @@ -128,9 +131,31 @@ void rip_init(void) { + sysctl_net_inet_raw_setup(NULL); in_pcbinit(&rawcbtable, 1, 1); } +static void +rip_sbappendaddr(struct inpcb *last, struct ip *ip, const struct sockaddr *sa, + int hlen, struct mbuf *opts, struct mbuf *n) +{ + if (last->inp_flags & INP_NOHEADER) + m_adj(n, hlen); + if (last->inp_flags & INP_CONTROLOPTS +#ifdef SO_OTIMESTAMP + || last->inp_socket->so_options & SO_OTIMESTAMP +#endif + || last->inp_socket->so_options & SO_TIMESTAMP) + ip_savecontrol(last, &opts, ip, n); + if (sbappendaddr(&last->inp_socket->so_rcv, sa, n, opts) == 0) { + /* should notify about lost packet */ + m_freem(n); + if (opts) + m_freem(opts); + } else + sorwakeup(last->inp_socket); +} + /* * Setup generic address and protocol structures * for raw_input routine, then pass them along with @@ -139,12 +164,12 @@ rip_init(void) void rip_input(struct mbuf *m, ...) { - int proto; + int hlen, proto; struct ip *ip = mtod(m, struct ip *); struct inpcb_hdr *inph; struct inpcb *inp; - struct inpcb *last = 0; - struct mbuf *opts = 0; + struct inpcb *last = NULL; + struct mbuf *n, *opts = NULL; struct sockaddr_in ripsrc; va_list ap; @@ -153,21 +178,18 @@ rip_input(struct mbuf *m, ...) proto = va_arg(ap, int); va_end(ap); - ripsrc.sin_family = AF_INET; - ripsrc.sin_len = sizeof(struct sockaddr_in); - ripsrc.sin_addr = ip->ip_src; - ripsrc.sin_port = 0; - bzero((void *)ripsrc.sin_zero, sizeof(ripsrc.sin_zero)); + sockaddr_in_init(&ripsrc, &ip->ip_src, 0); /* * XXX Compatibility: programs using raw IP expect ip_len * XXX to have the header length subtracted, and in host order. * XXX ip_off is also expected to be host order. */ - ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); + hlen = ip->ip_hl << 2; + ip->ip_len = ntohs(ip->ip_len) - hlen; NTOHS(ip->ip_off); - CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { + TAILQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) { inp = (struct inpcb *)inph; if (inp->inp_af != AF_INET) continue; @@ -179,62 +201,44 @@ rip_input(struct mbuf *m, ...) if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; - if (last) { - struct mbuf *n; - -#if defined(IPSEC) || defined(FAST_IPSEC) - /* check AH/ESP integrity. */ - if (ipsec4_in_reject_so(m, last->inp_socket)) { - ipsecstat.in_polvio++; - /* do not inject data to pcb */ - } else + if (last == NULL) + ; +#if defined(IPSEC) + /* check AH/ESP integrity. */ + else if (ipsec4_in_reject_so(m, last->inp_socket)) { + IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + /* do not inject data to pcb */ + } #endif /*IPSEC*/ - if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, n); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), n, opts) == 0) { - /* should notify about lost packet */ - m_freem(n); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - opts = NULL; - } + else if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) { + rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, opts, + n); + opts = NULL; } last = inp; } -#if defined(IPSEC) || defined(FAST_IPSEC) +#if defined(IPSEC) /* check AH/ESP integrity. */ - if (last && ipsec4_in_reject_so(m, last->inp_socket)) { + if (last != NULL && ipsec4_in_reject_so(m, last->inp_socket)) { m_freem(m); - ipsecstat.in_polvio++; - ipstat.ips_delivered--; + IPSEC_STATINC(IPSEC_STAT_IN_POLVIO); + IP_STATDEC(IP_STAT_DELIVERED); /* do not inject data to pcb */ } else #endif /*IPSEC*/ - if (last) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip_savecontrol(last, &opts, ip, m); - if (sbappendaddr(&last->inp_socket->so_rcv, - sintosa(&ripsrc), m, opts) == 0) { - m_freem(m); - if (opts) - m_freem(opts); - } else - sorwakeup(last->inp_socket); - } else { - if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, - 0, 0); - ipstat.ips_noproto++; - ipstat.ips_delivered--; - } else - m_freem(m); - } + if (last != NULL) + rip_sbappendaddr(last, ip, sintosa(&ripsrc), hlen, opts, m); + else if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { + uint64_t *ips; + + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, + 0, 0); + ips = IP_STAT_GETREF(); + ips[IP_STAT_NOPROTO]++; + ips[IP_STAT_DELIVERED]--; + IP_STAT_PUTREF(); + } else + m_freem(m); return; } @@ -243,14 +247,12 @@ rip_pcbnotify(struct inpcbtable *table, struct in_addr faddr, struct in_addr laddr, int proto, int errno, void (*notify)(struct inpcb *, int)) { - struct inpcb *inp, *ninp; + struct inpcb_hdr *inph, *ninph; int nmatch; nmatch = 0; - for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue); - inp != (struct inpcb *)&table->inpt_queue; - inp = ninp) { - ninp = (struct inpcb *)inp->inp_queue.cqe_next; + TAILQ_FOREACH_SAFE(inph, &table->inpt_queue, inph_queue, ninph) { + struct inpcb *inp = (struct inpcb *)inph; if (inp->inp_af != AF_INET) continue; if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto) @@ -365,12 +367,12 @@ rip_output(struct mbuf *m, ...) } HTONS(ip->ip_len); HTONS(ip->ip_off); - if (ip->ip_id == 0) - ip->ip_id = ip_newid(); + if (ip->ip_id != 0 || m->m_pkthdr.len < IP_MINFRAGSIZE) + flags |= IP_NOIPNEWID; opts = NULL; /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; - ipstat.ips_rawout++; + IP_STATINC(IP_STAT_RAWOUT); } return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, inp->inp_socket, &inp->inp_errormtu)); @@ -380,31 +382,42 @@ rip_output(struct mbuf *m, ...) * Raw IP socket option processing. */ int -rip_ctloutput(int op, struct socket *so, int level, int optname, - struct mbuf **m) +rip_ctloutput(int op, struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error = 0; + int optval; - if (level != IPPROTO_IP) { - error = ENOPROTOOPT; - if (op == PRCO_SETOPT && *m != 0) - (void) m_free(*m); - } else switch (op) { + if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) { + if (op == PRCO_GETOPT) { + optval = (inp->inp_flags & INP_NOHEADER) ? 1 : 0; + error = sockopt_set(sopt, &optval, sizeof(optval)); + } else if (op == PRCO_SETOPT) { + error = sockopt_getint(sopt, &optval); + if (error) + goto out; + if (optval) { + inp->inp_flags &= ~INP_HDRINCL; + inp->inp_flags |= INP_NOHEADER; + } else + inp->inp_flags &= ~INP_NOHEADER; + } + goto out; + } else if (sopt->sopt_level != IPPROTO_IP) + return ip_ctloutput(op, so, sopt); + + switch (op) { case PRCO_SETOPT: - switch (optname) { + switch (sopt->sopt_name) { case IP_HDRINCL: - if (*m == 0 || (*m)->m_len < sizeof (int)) - error = EINVAL; - else { - if (*mtod(*m, int *)) - inp->inp_flags |= INP_HDRINCL; - else - inp->inp_flags &= ~INP_HDRINCL; - } - if (*m != 0) - (void) m_free(*m); + error = sockopt_getint(sopt, &optval); + if (error) + break; + if (optval) + inp->inp_flags |= INP_HDRINCL; + else + inp->inp_flags &= ~INP_HDRINCL; break; #ifdef MROUTING @@ -418,23 +431,21 @@ rip_ctloutput(int op, struct socket *so, case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: - error = ip_mrouter_set(so, optname, m); + error = ip_mrouter_set(so, sopt); break; #endif default: - error = ip_ctloutput(op, so, level, optname, m); + error = ip_ctloutput(op, so, sopt); break; } break; case PRCO_GETOPT: - switch (optname) { + switch (sopt->sopt_name) { case IP_HDRINCL: - *m = m_get(M_WAIT, MT_SOOPTS); - MCLAIM((*m), so->so_mowner); - (*m)->m_len = sizeof (int); - *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0; + optval = inp->inp_flags & INP_HDRINCL; + error = sockopt_set(sopt, &optval, sizeof(optval)); break; #ifdef MROUTING @@ -442,17 +453,18 @@ rip_ctloutput(int op, struct socket *so, case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: - error = ip_mrouter_get(so, optname, m); + error = ip_mrouter_get(so, sopt); break; #endif default: - error = ip_ctloutput(op, so, level, optname, m); + error = ip_ctloutput(op, so, sopt); break; } break; } - return (error); + out: + return error; } int @@ -462,10 +474,9 @@ rip_bind(struct inpcb *inp, struct mbuf if (nam->m_len != sizeof(*addr)) return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) + if (!IFNET_FIRST()) return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) + if (addr->sin_family != AF_INET) return (EAFNOSUPPORT); if (!in_nullhost(addr->sin_addr) && ifa_ifwithaddr(sintosa(addr)) == 0) @@ -481,10 +492,9 @@ rip_connect(struct inpcb *inp, struct mb if (nam->m_len != sizeof(*addr)) return (EINVAL); - if (TAILQ_FIRST(&ifnet) == 0) + if (!IFNET_FIRST()) return (EADDRNOTAVAIL); - if (addr->sin_family != AF_INET && - addr->sin_family != AF_IMPLINK) + if (addr->sin_family != AF_INET) return (EAFNOSUPPORT); inp->inp_faddr = addr->sin_addr; return (0); @@ -513,15 +523,16 @@ rip_usrreq(struct socket *so, int req, #endif if (req == PRU_CONTROL) - return (in_control(so, (long)m, (void *)nam, - (struct ifnet *)control, l)); + return in_control(so, (long)m, nam, (struct ifnet *)control, l); s = splsoftnet(); if (req == PRU_PURGEIF) { + mutex_enter(softnet_lock); in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control); in_purgeif((struct ifnet *)control); in_pcbpurgeif(&rawcbtable, (struct ifnet *)control); + mutex_exit(softnet_lock); splx(s); return (0); } @@ -531,7 +542,7 @@ rip_usrreq(struct socket *so, int req, if (req != PRU_SEND && req != PRU_SENDOOB && control) panic("rip_usrreq: unexpected control mbuf"); #endif - if (inp == 0 && req != PRU_ATTACH) { + if (inp == NULL && req != PRU_ATTACH) { error = EINVAL; goto release; } @@ -539,6 +550,7 @@ rip_usrreq(struct socket *so, int req, switch (req) { case PRU_ATTACH: + sosetlock(so); if (inp != 0) { error = EISCONN; break; @@ -675,16 +687,20 @@ release: return (error); } -SYSCTL_SETUP(sysctl_net_inet_raw_setup, "sysctl net.inet.raw subtree setup") +PR_WRAP_USRREQ(rip_usrreq) + +#define rip_usrreq rip_usrreq_wrapper + +const struct pr_usrreqs rip_usrreqs = { + .pr_generic = rip_usrreq, +}; + +static void +sysctl_net_inet_raw_setup(struct sysctllog **clog) { sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT, - CTLTYPE_NODE, "net", NULL, - NULL, 0, NULL, 0, - CTL_NET, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet", NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL);