Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.196 retrieving revision 1.212 diff -u -p -r1.196 -r1.212 --- src/sys/kern/uipc_socket.c 2009/12/20 09:36:06 1.196 +++ src/sys/kern/uipc_socket.c 2012/10/08 19:20:45 1.212 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.196 2009/12/20 09:36:06 dsl Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.212 2012/10/08 19:20:45 pooka Exp $ */ /*- * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -63,7 +63,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.196 2009/12/20 09:36:06 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.212 2012/10/08 19:20:45 pooka Exp $"); #include "opt_compat_netbsd.h" #include "opt_sock_counters.h" @@ -92,13 +92,16 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #ifdef COMPAT_50 #include #include #endif -#include +#include +#include +#include MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -133,8 +136,6 @@ EVCNT_ATTACH_STATIC(sosend_kvalimit); #endif /* SOSEND_COUNTERS */ -static struct callback_entry sokva_reclaimerentry; - #if defined(SOSEND_NO_LOAN) || defined(MULTIPROCESSOR) int sock_loan_thresh = -1; #else @@ -142,7 +143,7 @@ int sock_loan_thresh = 4096; #endif static kmutex_t so_pendfree_lock; -static struct mbuf *so_pendfree; +static struct mbuf *so_pendfree = NULL; #ifndef SOMAXKVA #define SOMAXKVA (16 * 1024 * 1024) @@ -155,10 +156,11 @@ static kauth_listener_t socket_listener; #define SOCK_LOAN_CHUNK 65536 -static size_t sodopendfree(void); -static size_t sodopendfreel(void); +static void sopendfree_thread(void *); +static kcondvar_t pendfree_thread_cv; +static lwp_t *sopendfree_lwp; -static void sysctl_kern_somaxkva_setup(void); +static void sysctl_kern_socket_setup(void); static struct sysctllog *socket_sysctllog; static vsize_t @@ -168,21 +170,6 @@ sokvareserve(struct socket *so, vsize_t mutex_enter(&so_pendfree_lock); while (socurkva + len > somaxkva) { - size_t freed; - - /* - * try to do pendfree. - */ - - freed = sodopendfreel(); - - /* - * if some kva was freed, try again. - */ - - if (freed) - continue; - SOSEND_COUNTER_INCR(&sosend_kvalimit); error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); if (error) { @@ -210,7 +197,7 @@ sokvaunreserve(vsize_t len) */ vaddr_t -sokvaalloc(vsize_t len, struct socket *so) +sokvaalloc(vaddr_t sva, vsize_t len, struct socket *so) { vaddr_t lva; @@ -225,7 +212,8 @@ sokvaalloc(vsize_t len, struct socket *s * allocate kva. */ - lva = uvm_km_alloc(kernel_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA); + lva = uvm_km_alloc(kernel_map, len, atop(sva) & uvmexp.colormask, + UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA); if (lva == 0) { sokvaunreserve(len); return (0); @@ -275,56 +263,45 @@ sodoloanfree(struct vm_page **pgs, void sokvafree(sva, len); } -static size_t -sodopendfree(void) -{ - size_t rv; - - if (__predict_true(so_pendfree == NULL)) - return 0; - - mutex_enter(&so_pendfree_lock); - rv = sodopendfreel(); - mutex_exit(&so_pendfree_lock); - - return rv; -} - /* - * sodopendfreel: free mbufs on "pendfree" list. + * sopendfree_thread: free mbufs on "pendfree" list. * unlock and relock so_pendfree_lock when freeing mbufs. - * - * => called with so_pendfree_lock held. */ -static size_t -sodopendfreel(void) +static void +sopendfree_thread(void *v) { struct mbuf *m, *next; - size_t rv = 0; - - KASSERT(mutex_owned(&so_pendfree_lock)); + size_t rv; - while (so_pendfree != NULL) { - m = so_pendfree; - so_pendfree = NULL; - mutex_exit(&so_pendfree_lock); + mutex_enter(&so_pendfree_lock); - for (; m != NULL; m = next) { - next = m->m_next; - KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); - KASSERT(m->m_ext.ext_refcnt == 0); + for (;;) { + rv = 0; + while (so_pendfree != NULL) { + m = so_pendfree; + so_pendfree = NULL; + mutex_exit(&so_pendfree_lock); + + for (; m != NULL; m = next) { + next = m->m_next; + KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); + KASSERT(m->m_ext.ext_refcnt == 0); + + rv += m->m_ext.ext_size; + sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, + m->m_ext.ext_size); + pool_cache_put(mb_cache, m); + } - rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, - m->m_ext.ext_size); - pool_cache_put(mb_cache, m); + mutex_enter(&so_pendfree_lock); } - - mutex_enter(&so_pendfree_lock); + if (rv) + cv_broadcast(&socurkva_cv); + cv_wait(&pendfree_thread_cv, &so_pendfree_lock); } - - return (rv); + panic("sopendfree_thread"); + /* NOTREACHED */ } void @@ -343,7 +320,7 @@ soloanfree(struct mbuf *m, void *buf, si mutex_enter(&so_pendfree_lock); m->m_next = so_pendfree; so_pendfree = m; - cv_broadcast(&socurkva_cv); + cv_signal(&pendfree_thread_cv); mutex_exit(&so_pendfree_lock); } @@ -373,7 +350,7 @@ sosend_loan(struct socket *so, struct ui KASSERT(npgs <= M_EXT_MAXPAGES); - lva = sokvaalloc(len, so); + lva = sokvaalloc(sva, len, so); if (lva == 0) return 0; @@ -406,20 +383,6 @@ sosend_loan(struct socket *so, struct ui return (space); } -static int -sokva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) -{ - - KASSERT(ce == &sokva_reclaimerentry); - KASSERT(obj == NULL); - - sodopendfree(); - if (!vm_map_starved_p(kernel_map)) { - return CALLBACK_CHAIN_ABORT; - } - return CALLBACK_CHAIN_CONTINUE; -} - struct mbuf * getsombuf(struct socket *so, int type) { @@ -452,10 +415,8 @@ socket_listener_cb(kauth_cred_t cred, ka case KAUTH_REQ_NETWORK_SOCKET_DROP: { /* Normal users can only drop their own connections. */ struct socket *so = (struct socket *)arg1; - uid_t sockuid = so->so_uidinfo->ui_uid; - if (sockuid == kauth_cred_getuid(cred) || - sockuid == kauth_cred_geteuid(cred)) + if (proc_uidmatch(cred, so->so_cred)) result = KAUTH_RESULT_ALLOW; break; @@ -463,9 +424,10 @@ socket_listener_cb(kauth_cred_t cred, ka case KAUTH_REQ_NETWORK_SOCKET_OPEN: /* We allow "raw" routing/bluetooth sockets to anyone. */ - if ((u_long)arg1 == PF_ROUTE || (u_long)arg1 == PF_BLUETOOTH) + if ((u_long)arg1 == PF_ROUTE || (u_long)arg1 == PF_OROUTE + || (u_long)arg1 == PF_BLUETOOTH) { result = KAUTH_RESULT_ALLOW; - else { + } else { /* Privileged, let secmodel handle this. */ if ((u_long)arg2 == SOCK_RAW) break; @@ -491,24 +453,31 @@ void soinit(void) { - sysctl_kern_somaxkva_setup(); + sysctl_kern_socket_setup(); mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); cv_init(&socurkva_cv, "sokva"); + cv_init(&pendfree_thread_cv, "sopendfr"); soinit2(); /* Set the initial adjusted socket buffer size. */ if (sb_max_set(sb_max)) panic("bad initial sb_max value: %lu", sb_max); - callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, - &sokva_reclaimerentry, NULL, sokva_reclaim_callback); - socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, socket_listener_cb, NULL); } +void +soinit1(void) +{ + int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, + sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree"); + if (error) + panic("soinit1 %d", error); +} + /* * Socket operation routines. * These routines are called by the routines in @@ -561,10 +530,8 @@ socreate(int dom, struct socket **aso, i so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; so->so_mowner = &prp->pr_domain->dom_mowner; #endif - /* so->so_cred = kauth_cred_dup(l->l_cred); */ uid = kauth_cred_geteuid(l->l_cred); so->so_uidinfo = uid_find(uid); - so->so_egid = kauth_cred_getegid(l->l_cred); so->so_cpid = l->l_proc->p_pid; if (lockso != NULL) { /* Caller wants us to share a lock. */ @@ -583,6 +550,7 @@ socreate(int dom, struct socket **aso, i sofree(so); return error; } + so->so_cred = kauth_cred_dup(l->l_cred); sounlock(so); *aso = so; return 0; @@ -598,10 +566,14 @@ fsocreate(int domain, struct socket **so struct socket *so; struct file *fp; int fd, error; + int flags = type & SOCK_FLAGS_MASK; + type &= ~SOCK_FLAGS_MASK; if ((error = fd_allocfile(&fp, &fd)) != 0) - return (error); - fp->f_flag = FREAD|FWRITE; + return error; + fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); + fp->f_flag = FREAD|FWRITE|((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)| + ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0); fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; error = socreate(domain, &so, type, protocol, l, NULL); @@ -650,7 +622,7 @@ solisten(struct socket *so, int backlog, if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) != 0) { sounlock(so); - return (EOPNOTSUPP); + return (EINVAL); } error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, l); @@ -704,7 +676,6 @@ sofree(struct socket *so) /* Remove acccept filter if one is present. */ if (so->so_accf != NULL) (void)accept_filt_clear(so); - /* kauth_cred_free(so->so_cred); */ sounlock(so); if (refs == 0) /* XXX */ soput(so); @@ -754,7 +725,8 @@ soclose(struct socket *so) goto drop; } if (so->so_options & SO_LINGER) { - if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio) + if ((so->so_state & (SS_ISDISCONNECTING|SS_NBIO)) == + (SS_ISDISCONNECTING|SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = sowait(so, true, so->so_linger * hz); @@ -773,6 +745,7 @@ soclose(struct socket *so) discard: if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); + kauth_cred_free(so->so_cred); so->so_state |= SS_NOFDREF; sofree(so); return (error); @@ -875,7 +848,6 @@ sodisconnect(struct socket *so) error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, NULL, NULL); } - sodopendfree(); return (error); } @@ -908,7 +880,6 @@ sosend(struct socket *so, struct mbuf *a short wakeup_state = 0; p = l->l_proc; - sodopendfree(); clen = 0; /* @@ -975,7 +946,7 @@ sosend(struct socket *so, struct mbuf *a } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { - if (so->so_nbio) { + if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { error = EWOULDBLOCK; goto release; } @@ -1025,7 +996,7 @@ sosend(struct socket *so, struct mbuf *a } if (resid >= MINCLSIZE && space >= MCLBYTES) { SOSEND_COUNTER_INCR(&sosend_copy_big); - m_clget(m, M_WAIT); + m_clget(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; @@ -1161,7 +1132,8 @@ soreceive(struct socket *so, struct mbuf { struct lwp *l = curlwp; struct mbuf *m, **mp, *mt; - int atomic, flags, len, error, s, offset, moff, type, orig_resid; + size_t len, offset, moff, orig_resid; + int atomic, flags, error, s, type; const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; @@ -1184,9 +1156,6 @@ soreceive(struct socket *so, struct mbuf else flags = 0; - if ((flags & MSG_DONTWAIT) == 0) - sodopendfree(); - if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); @@ -1197,7 +1166,7 @@ soreceive(struct socket *so, struct mbuf goto bad; do { error = uiomove(mtod(m, void *), - (int) min(uio->uio_resid, m->m_len), uio); + MIN(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid > 0 && error == 0 && m); bad: @@ -1274,7 +1243,8 @@ soreceive(struct socket *so, struct mbuf } if (uio->uio_resid == 0) goto release; - if (so->so_nbio || (flags & MSG_DONTWAIT)) { + if ((so->so_state & SS_NBIO) || + (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; goto release; } @@ -1379,7 +1349,9 @@ soreceive(struct socket *so, struct mbuf type == SCM_RIGHTS) { sounlock(so); splx(s); - error = (*dom->dom_externalize)(cm, l); + error = (*dom->dom_externalize)(cm, l, + (flags & MSG_CMSG_CLOEXEC) ? + O_CLOEXEC : 0); s = splsoftnet(); solock(so); } @@ -1448,7 +1420,7 @@ soreceive(struct socket *so, struct mbuf SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); sounlock(so); splx(s); - error = uiomove(mtod(m, char *) + moff, (int)len, uio); + error = uiomove(mtod(m, char *) + moff, len, uio); s = splsoftnet(); solock(so); if (error != 0) { @@ -1732,6 +1704,7 @@ sosetopt1(struct socket *so, const struc case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: + case SO_NOSIGPIPE: #ifdef SO_OTIMESTAMP case SO_OTIMESTAMP: #endif @@ -1932,6 +1905,7 @@ sogetopt1(struct socket *so, struct sock case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: + case SO_NOSIGPIPE: #ifdef SO_OTIMESTAMP case SO_OTIMESTAMP: #endif @@ -2392,6 +2366,7 @@ sopoll(struct socket *so, int events) #include static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); +static int sysctl_kern_sbmax(SYSCTLFN_PROTO); /* * sysctl helper routine for kern.somaxkva. ensures that the given @@ -2422,8 +2397,32 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) return (error); } +/* + * sysctl helper routine for kern.sbmax. Basically just ensures that + * any new value is not too small. + */ +static int +sysctl_kern_sbmax(SYSCTLFN_ARGS) +{ + int error, new_sbmax; + struct sysctlnode node; + + new_sbmax = sb_max; + node = *rnode; + node.sysctl_data = &new_sbmax; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + KERNEL_LOCK(1, NULL); + error = sb_max_set(new_sbmax); + KERNEL_UNLOCK_ONE(NULL); + + return (error); +} + static void -sysctl_kern_somaxkva_setup(void) +sysctl_kern_socket_setup(void) { KASSERT(socket_sysctllog == NULL); @@ -2440,4 +2439,11 @@ sysctl_kern_somaxkva_setup(void) "used for socket buffers"), sysctl_kern_somaxkva, 0, NULL, 0, CTL_KERN, KERN_SOMAXKVA, CTL_EOL); + + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "sbmax", + SYSCTL_DESCR("Maximum socket buffer size"), + sysctl_kern_sbmax, 0, NULL, 0, + CTL_KERN, KERN_SBMAX, CTL_EOL); }