Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.215.4.4 retrieving revision 1.260 diff -u -p -r1.215.4.4 -r1.260 --- src/sys/kern/uipc_socket.c 2014/05/18 17:46:08 1.215.4.4 +++ src/sys/kern/uipc_socket.c 2018/03/19 16:31:24 1.260 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.215.4.4 2014/05/18 17:46:08 rmind Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.260 2018/03/19 16:31:24 roy Exp $ */ /*- * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -71,14 +71,17 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.215.4.4 2014/05/18 17:46:08 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.260 2018/03/19 16:31:24 roy Exp $"); +#ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" #include "opt_sock_counters.h" #include "opt_sosend_loan.h" #include "opt_mbuftrace.h" #include "opt_somaxkva.h" #include "opt_multiprocessor.h" /* XXX */ +#include "opt_sctp.h" +#endif #include #include @@ -292,7 +295,8 @@ sopendfree_thread(void *v) for (; m != NULL; m = next) { next = m->m_next; - KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); + KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == + 0); KASSERT(m->m_ext.ext_refcnt == 0); rv += m->m_ext.ext_size; @@ -431,17 +435,20 @@ socket_listener_cb(kauth_cred_t cred, ka case KAUTH_REQ_NETWORK_SOCKET_OPEN: /* We allow "raw" routing/bluetooth sockets to anyone. */ - if ((u_long)arg1 == PF_ROUTE || (u_long)arg1 == PF_OROUTE - || (u_long)arg1 == PF_BLUETOOTH) { + switch ((u_long)arg1) { + case PF_ROUTE: + case PF_OROUTE: + case PF_BLUETOOTH: + case PF_CAN: result = KAUTH_RESULT_ALLOW; - } else { + break; + default: /* Privileged, let secmodel handle this. */ if ((u_long)arg2 == SOCK_RAW) break; + result = KAUTH_RESULT_ALLOW; + break; } - - result = KAUTH_RESULT_ALLOW; - break; case KAUTH_REQ_NETWORK_SOCKET_CANSEE: @@ -528,8 +535,6 @@ socreate(int dom, struct socket **aso, i so = soget(true); so->so_type = type; so->so_proto = prp; - so->so_refcnt = 1; - so->so_send = sosend; so->so_receive = soreceive; #ifdef MBUFTRACE @@ -542,24 +547,28 @@ socreate(int dom, struct socket **aso, i so->so_cpid = l->l_proc->p_pid; /* - * Lock assigned and taken during pr_attach, unless we share + * Lock assigned and taken during PCB attach, unless we share * the lock with another socket, e.g. socketpair(2) case. */ if (lockso) { lock = lockso->so_lock; so->so_lock = lock; mutex_obj_hold(lock); + mutex_enter(lock); } + /* Attach the PCB (returns with the socket lock held). */ error = (*prp->pr_usrreqs->pr_attach)(so, proto); + KASSERT(solocked(so)); + if (error) { - solock(so); KASSERT(so->so_pcb == NULL); so->so_state |= SS_NOFDREF; sofree(so); return error; } so->so_cred = kauth_cred_dup(l->l_cred); + sounlock(so); *aso = so; return 0; @@ -598,7 +607,7 @@ fsocreate(int domain, struct socket **so if (flags & SOCK_NONBLOCK) { so->so_state |= SS_NBIO; } - fp->f_data = so; + fp->f_socket = so; fd_affix(curproc, fp, fd); if (sop != NULL) { @@ -608,21 +617,6 @@ fsocreate(int domain, struct socket **so return error; } -void -soref(struct socket *so) -{ - atomic_inc_uint(&so->so_refcnt); -} - -void -sounref(struct socket *so) -{ - if (atomic_dec_uint_nv(&so->so_refcnt) > 0) { - return; - } - soput(so); -} - int sofamily(const struct socket *so) { @@ -637,13 +631,16 @@ sofamily(const struct socket *so) } int -sobind(struct socket *so, struct mbuf *nam, struct lwp *l) +sobind(struct socket *so, struct sockaddr *nam, struct lwp *l) { - int error; + int error; solock(so); - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_BIND, NULL, nam, NULL, l); + if (nam->sa_family != so->so_proto->pr_domain->dom_family) { + sounlock(so); + return EAFNOSUPPORT; + } + error = (*so->so_proto->pr_usrreqs->pr_bind)(so, nam, l); sounlock(so); return error; } @@ -651,25 +648,30 @@ sobind(struct socket *so, struct mbuf *n int solisten(struct socket *so, int backlog, struct lwp *l) { - int error; + int error; + short oldopt, oldqlimit; solock(so); - if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | + if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) != 0) { sounlock(so); return EINVAL; } - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_LISTEN, NULL, NULL, NULL, l); - if (error != 0) { - sounlock(so); - return error; - } + oldopt = so->so_options; + oldqlimit = so->so_qlimit; if (TAILQ_EMPTY(&so->so_q)) so->so_options |= SO_ACCEPTCONN; if (backlog < 0) backlog = 0; so->so_qlimit = min(backlog, somaxconn); + + error = (*so->so_proto->pr_usrreqs->pr_listen)(so, l); + if (error != 0) { + so->so_options = oldopt; + so->so_qlimit = oldqlimit; + sounlock(so); + return error; + } sounlock(so); return 0; } @@ -677,6 +679,7 @@ solisten(struct socket *so, int backlog, void sofree(struct socket *so) { + u_int refs; KASSERT(solocked(so)); @@ -706,13 +709,13 @@ sofree(struct socket *so) KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv)); KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); sorflush(so); + refs = so->so_aborting; /* XXX */ /* Remove acccept filter if one is present. */ if (so->so_accf != NULL) (void)accept_filt_clear(so); sounlock(so); - - /* Will soput() if the last reference. */ - sounref(so); + if (refs == 0) /* XXX */ + soput(so); } /* @@ -768,6 +771,7 @@ soclose(struct socket *so) } drop: if (so->so_pcb) { + KASSERT(solocked(so)); (*so->so_proto->pr_usrreqs->pr_detach)(so); } discard: @@ -790,24 +794,19 @@ soabort(struct socket *so) KASSERT(solocked(so)); KASSERT(so->so_head == NULL); - soref(so); - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_ABORT, NULL, NULL, NULL, NULL); - refs = so->so_refcnt; - sounref(so); - - /* XXX: Fix PRU_ABORT to behave consistently. */ - if (error || refs == 1) { + so->so_aborting++; /* XXX */ + error = (*so->so_proto->pr_usrreqs->pr_abort)(so); + refs = --so->so_aborting; /* XXX */ + if (error || (refs == 0)) { sofree(so); } else { sounlock(so); } - sounref(so); return error; } int -soaccept(struct socket *so, struct mbuf *nam) +soaccept(struct socket *so, struct sockaddr *nam) { int error; @@ -815,11 +814,9 @@ soaccept(struct socket *so, struct mbuf KASSERT((so->so_state & SS_NOFDREF) != 0); so->so_state &= ~SS_NOFDREF; - if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_ACCEPT, NULL, nam, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_accept)(so, nam); else error = ECONNABORTED; @@ -827,7 +824,7 @@ soaccept(struct socket *so, struct mbuf } int -soconnect(struct socket *so, struct mbuf *nam, struct lwp *l) +soconnect(struct socket *so, struct sockaddr *nam, struct lwp *l) { int error; @@ -843,11 +840,14 @@ soconnect(struct socket *so, struct mbuf */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || - (error = sodisconnect(so)))) + (error = sodisconnect(so)))) { error = EISCONN; - else - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_CONNECT, NULL, nam, NULL, l); + } else { + if (nam->sa_family != so->so_proto->pr_domain->dom_family) { + return EAFNOSUPPORT; + } + error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l); + } return error; } @@ -857,8 +857,7 @@ soconnect2(struct socket *so1, struct so { KASSERT(solocked2(so1, so2)); - return (*so1->so_proto->pr_usrreqs->pr_generic)(so1, - PRU_CONNECT2, NULL, (struct mbuf *)so2, NULL, NULL); + return (*so1->so_proto->pr_usrreqs->pr_connect2)(so1, so2); } int @@ -873,8 +872,7 @@ sodisconnect(struct socket *so) } else if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; } else { - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - PRU_DISCONNECT, NULL, NULL, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_disconnect)(so); } return (error); } @@ -898,8 +896,8 @@ sodisconnect(struct socket *so) * Data and control buffers are freed on return. */ int -sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags, struct lwp *l) +sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, struct lwp *l) { struct mbuf **mp, *m; long space, len, resid, clen, mlen; @@ -956,7 +954,7 @@ sosend(struct socket *so, struct mbuf *a error = ENOTCONN; goto release; } - } else if (addr == 0) { + } else if (addr == NULL) { error = EDESTADDRREQ; goto release; } @@ -1004,7 +1002,7 @@ sosend(struct socket *so, struct mbuf *a m = m_gethdr(M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; + m_reset_rcvif(m); } else { m = m_get(M_WAIT, MT_DATA); mlen = MLEN; @@ -1070,9 +1068,13 @@ sosend(struct socket *so, struct mbuf *a so->so_options |= SO_DONTROUTE; if (resid > 0) so->so_state |= SS_MORETOCOME; - error = (*so->so_proto->pr_usrreqs->pr_generic)(so, - (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, - top, addr, control, curlwp); + if (flags & MSG_OOB) { + error = (*so->so_proto->pr_usrreqs->pr_sendoob)( + so, top, control); + } else { + error = (*so->so_proto->pr_usrreqs->pr_send)(so, + top, addr, control, l); + } if (dontroute) so->so_options &= ~SO_DONTROUTE; if (resid > 0) @@ -1184,8 +1186,7 @@ soreceive(struct socket *so, struct mbuf if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); - error = (*pr->pr_usrreqs->pr_generic)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), NULL, l); + error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); sounlock(so); if (error) goto bad; @@ -1243,8 +1244,7 @@ soreceive(struct socket *so, struct mbuf if (m != NULL) goto dontblock; error = so->so_error; - if ((flags & MSG_PEEK) == 0) - so->so_error = 0; + so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { @@ -1327,12 +1327,35 @@ soreceive(struct socket *so, struct mbuf m->m_next = NULL; m = so->so_rcv.sb_mb; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m = so->so_rcv.sb_mb = m_free(m); } sbsync(&so->so_rcv, nextrecord); } } + if (pr->pr_flags & PR_ADDR_OPT) { + /* + * For SCTP we may be getting a + * whole message OR a partial delivery. + */ + if (m->m_type == MT_SONAME) { + orig_resid = 0; + if (flags & MSG_PEEK) { + if (paddr) + *paddr = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (paddr) { + *paddr = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + m = so->so_rcv.sb_mb = m_free(m); + } + } + } + } /* * Process one or more MT_CONTROL mbufs present before any data mbufs @@ -1387,7 +1410,7 @@ soreceive(struct socket *so, struct mbuf */ if (dom->dom_dispose != NULL && type == SCM_RIGHTS) { - sounlock(so); + sounlock(so); (*dom->dom_dispose)(cm); solock(so); } @@ -1467,6 +1490,10 @@ soreceive(struct socket *so, struct mbuf if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; +#ifdef SCTP + if (m->m_flags & M_NOTIFICATION) + flags |= MSG_NOTIFICATION; +#endif /* SCTP */ if (flags & MSG_PEEK) { m = m->m_next; moff = 0; @@ -1479,8 +1506,7 @@ soreceive(struct socket *so, struct mbuf so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m = so->so_rcv.sb_mb = m_free(m); } /* * If m != NULL, we also know that @@ -1552,8 +1578,7 @@ soreceive(struct socket *so, struct mbuf * get it filled again. */ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) - (*pr->pr_usrreqs->pr_generic)(so, PRU_RCVD, - NULL, (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); if (wakeup_state & SS_RESTARTSYS) @@ -1594,8 +1619,7 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) - (*pr->pr_usrreqs->pr_generic)(so, PRU_RCVD, NULL, - (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1629,8 +1653,7 @@ soshutdown(struct socket *so, int how) error = 0; } if (how == SHUT_WR || how == SHUT_RDWR) - error = (*pr->pr_usrreqs->pr_generic)(so, - PRU_SHUTDOWN, NULL, NULL, NULL, NULL); + error = (*pr->pr_usrreqs->pr_shutdown)(so); return error; } @@ -1701,22 +1724,22 @@ sosetopt1(struct socket *so, const struc KASSERT(solocked(so)); break; - case SO_LINGER: - error = sockopt_get(sopt, &l, sizeof(l)); + case SO_LINGER: + error = sockopt_get(sopt, &l, sizeof(l)); solock(so); - if (error) - break; - if (l.l_linger < 0 || l.l_linger > USHRT_MAX || - l.l_linger > (INT_MAX / hz)) { + if (error) + break; + if (l.l_linger < 0 || l.l_linger > USHRT_MAX || + l.l_linger > (INT_MAX / hz)) { error = EDOM; break; } - so->so_linger = l.l_linger; - if (l.l_onoff) - so->so_options |= SO_LINGER; - else - so->so_options &= ~SO_LINGER; - break; + so->so_linger = l.l_linger; + if (l.l_onoff) + so->so_options |= SO_LINGER; + else + so->so_options &= ~SO_LINGER; + break; case SO_DEBUG: case SO_KEEPALIVE: @@ -1895,7 +1918,7 @@ so_setsockopt(struct lwp *l, struct sock return error; } - + /* * internal get SOL_SOCKET options */ @@ -2072,7 +2095,7 @@ sockopt_destroy(struct sockopt *sopt) /* * set sockopt value * - value is copied into sockopt - * - memory is allocated when necessary, will not sleep + * - memory is allocated when necessary, will not sleep */ int sockopt_set(struct sockopt *sopt, const void *buf, size_t len) @@ -2085,8 +2108,12 @@ sockopt_set(struct sockopt *sopt, const return error; } - KASSERT(sopt->sopt_size == len); + if (sopt->sopt_size < len) + return EINVAL; + memcpy(sopt->sopt_data, buf, len); + sopt->sopt_retsize = len; + return 0; } @@ -2145,9 +2172,12 @@ sockopt_setmbuf(struct sockopt *sopt, st return error; } - KASSERT(sopt->sopt_size == len); + if (sopt->sopt_size < len) + return EINVAL; + m_copydata(m, 0, len, sopt->sopt_data); m_freem(m); + sopt->sopt_retsize = len; return 0; } @@ -2197,7 +2227,7 @@ filt_sordetach(struct knote *kn) { struct socket *so; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) @@ -2212,7 +2242,7 @@ filt_soread(struct knote *kn, long hint) struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; if (hint != NOTE_SUBMIT) solock(so); kn->kn_data = so->so_rcv.sb_cc; @@ -2224,7 +2254,7 @@ filt_soread(struct knote *kn, long hint) rv = 1; else if (kn->kn_sfflags & NOTE_LOWAT) rv = (kn->kn_data >= kn->kn_sdata); - else + else rv = (kn->kn_data >= so->so_rcv.sb_lowat); if (hint != NOTE_SUBMIT) sounlock(so); @@ -2236,7 +2266,7 @@ filt_sowdetach(struct knote *kn) { struct socket *so; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) @@ -2251,7 +2281,7 @@ filt_sowrite(struct knote *kn, long hint struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; if (hint != NOTE_SUBMIT) solock(so); kn->kn_data = sbspace(&so->so_snd); @@ -2280,7 +2310,7 @@ filt_solisten(struct knote *kn, long hin struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; /* * Set kn_data to number of incoming connections, not @@ -2295,12 +2325,26 @@ filt_solisten(struct knote *kn, long hin return rv; } -static const struct filterops solisten_filtops = - { 1, NULL, filt_sordetach, filt_solisten }; -static const struct filterops soread_filtops = - { 1, NULL, filt_sordetach, filt_soread }; -static const struct filterops sowrite_filtops = - { 1, NULL, filt_sowdetach, filt_sowrite }; +static const struct filterops solisten_filtops = { + .f_isfd = 1, + .f_attach = NULL, + .f_detach = filt_sordetach, + .f_event = filt_solisten, +}; + +static const struct filterops soread_filtops = { + .f_isfd = 1, + .f_attach = NULL, + .f_detach = filt_sordetach, + .f_event = filt_soread, +}; + +static const struct filterops sowrite_filtops = { + .f_isfd = 1, + .f_attach = NULL, + .f_detach = filt_sowdetach, + .f_event = filt_sowrite, +}; int soo_kqfilter(struct file *fp, struct knote *kn) @@ -2308,7 +2352,7 @@ soo_kqfilter(struct file *fp, struct kno struct socket *so; struct sockbuf *sb; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); switch (kn->kn_filter) { case EVFILT_READ: @@ -2386,6 +2430,33 @@ sopoll(struct socket *so, int events) return revents; } +struct mbuf ** +sbsavetimestamp(int opt, struct mbuf *m, struct mbuf **mp) +{ + struct timeval tv; + microtime(&tv); + +#ifdef SO_OTIMESTAMP + if (opt & SO_OTIMESTAMP) { + struct timeval50 tv50; + + timeval_to_timeval50(&tv, &tv50); + *mp = sbcreatecontrol(&tv50, sizeof(tv50), + SCM_OTIMESTAMP, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } else +#endif + + if (opt & SO_TIMESTAMP) { + *mp = sbcreatecontrol(&tv, sizeof(tv), + SCM_TIMESTAMP, SOL_SOCKET); + if (*mp) + mp = &(*mp)->m_next; + } + return mp; +} + #include