Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.115.2.3 retrieving revision 1.134.2.2 diff -u -p -r1.115.2.3 -r1.134.2.2 --- src/sys/kern/uipc_socket.c 2005/12/31 11:34:26 1.115.2.3 +++ src/sys/kern/uipc_socket.c 2007/04/10 13:26:42 1.134.2.2 @@ -1,7 +1,7 @@ -/* $NetBSD: uipc_socket.c,v 1.115.2.3 2005/12/31 11:34:26 yamt Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.134.2.2 2007/04/10 13:26:42 ad Exp $ */ /*- - * Copyright (c) 2002 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.115.2.3 2005/12/31 11:34:26 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.134.2.2 2007/04/10 13:26:42 ad Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -91,10 +91,14 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include +#include +#include #include -POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); +POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL, + IPL_SOFTNET); MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -126,23 +130,15 @@ EVCNT_ATTACH_STATIC(sosend_kvalimit); #endif /* SOSEND_COUNTERS */ -void -soinit(void) -{ - - /* Set the initial adjusted socket buffer size. */ - if (sb_max_set(sb_max)) - panic("bad initial sb_max value: %lu", sb_max); - -} +static struct callback_entry sokva_reclaimerentry; #ifdef SOSEND_NO_LOAN -int use_sosend_loan = 0; +int sock_loan_thresh = -1; #else -int use_sosend_loan = 1; +int sock_loan_thresh = 4096; #endif -static struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; +static kmutex_t so_pendfree_lock; static struct mbuf *so_pendfree; #ifndef SOMAXKVA @@ -150,22 +146,19 @@ static struct mbuf *so_pendfree; #endif int somaxkva = SOMAXKVA; static int socurkva; -static int sokvawaiters; +static kcondvar_t socurkva_cv; -#define SOCK_LOAN_THRESH 4096 #define SOCK_LOAN_CHUNK 65536 -static size_t sodopendfree(struct socket *); -static size_t sodopendfreel(struct socket *); +static size_t sodopendfree(void); +static size_t sodopendfreel(void); static vsize_t sokvareserve(struct socket *so, vsize_t len) { - int s; int error; - s = splvm(); - simple_lock(&so_pendfree_slock); + mutex_enter(&so_pendfree_lock); while (socurkva + len > somaxkva) { size_t freed; @@ -173,7 +166,7 @@ sokvareserve(struct socket *so, vsize_t * try to do pendfree. */ - freed = sodopendfreel(so); + freed = sodopendfreel(); /* * if some kva was freed, try again. @@ -183,33 +176,25 @@ sokvareserve(struct socket *so, vsize_t continue; SOSEND_COUNTER_INCR(&sosend_kvalimit); - sokvawaiters++; - error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, - &so_pendfree_slock); - sokvawaiters--; + error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); if (error) { len = 0; break; } } socurkva += len; - simple_unlock(&so_pendfree_slock); - splx(s); + mutex_exit(&so_pendfree_lock); return len; } static void sokvaunreserve(vsize_t len) { - int s; - s = splvm(); - simple_lock(&so_pendfree_slock); + mutex_enter(&so_pendfree_lock); socurkva -= len; - if (sokvawaiters) - wakeup(&socurkva); - simple_unlock(&so_pendfree_slock); - splx(s); + cv_broadcast(&socurkva_cv); + mutex_exit(&so_pendfree_lock); } /* @@ -263,7 +248,7 @@ sokvafree(vaddr_t sva, vsize_t len) } static void -sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) +sodoloanfree(struct vm_page **pgs, void *buf, size_t size) { vaddr_t va, sva, eva; vsize_t len; @@ -279,7 +264,7 @@ sodoloanfree(struct vm_page **pgs, caddr pgs = alloca(npgs * sizeof(*pgs)); for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { - if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) + if (pmap_extract(pmap_kernel(), va, &pa) == false) panic("sodoloanfree: va 0x%lx not mapped", va); pgs[i] = PHYS_TO_VM_PAGE(pa); } @@ -292,45 +277,37 @@ sodoloanfree(struct vm_page **pgs, caddr } static size_t -sodopendfree(struct socket *so) +sodopendfree() { - int s; size_t rv; - s = splvm(); - simple_lock(&so_pendfree_slock); - rv = sodopendfreel(so); - simple_unlock(&so_pendfree_slock); - splx(s); + mutex_enter(&so_pendfree_lock); + rv = sodopendfreel(); + mutex_exit(&so_pendfree_lock); return rv; } /* * sodopendfreel: free mbufs on "pendfree" list. - * unlock and relock so_pendfree_slock when freeing mbufs. + * unlock and relock so_pendfree_lock when freeing mbufs. * - * => called with so_pendfree_slock held. - * => called at splvm. + * => called with so_pendfree_lock held. */ static size_t -sodopendfreel(struct socket *so) +sodopendfreel() { + struct mbuf *m, *next; size_t rv = 0; + int s; - LOCK_ASSERT(simple_lock_held(&so_pendfree_slock)); - - for (;;) { - struct mbuf *m; - struct mbuf *next; + KASSERT(mutex_owned(&so_pendfree_lock)); + while (so_pendfree != NULL) { m = so_pendfree; - if (m == NULL) - break; so_pendfree = NULL; - simple_unlock(&so_pendfree_slock); - /* XXX splx */ + mutex_exit(&so_pendfree_lock); for (; m != NULL; m = next) { next = m->m_next; @@ -339,20 +316,20 @@ sodopendfreel(struct socket *so) sodoloanfree((m->m_flags & M_EXT_PAGES) ? m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, m->m_ext.ext_size); + s = splvm(); pool_cache_put(&mbpool_cache, m); + splx(s); } - /* XXX splvm */ - simple_lock(&so_pendfree_slock); + mutex_enter(&so_pendfree_lock); } return (rv); } void -soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) +soloanfree(struct mbuf *m, void *buf, size_t size, void *arg) { - int s; if (m == NULL) { @@ -371,14 +348,11 @@ soloanfree(struct mbuf *m, caddr_t buf, * because we need to put kva back to kernel_map. */ - s = splvm(); - simple_lock(&so_pendfree_slock); + mutex_enter(&so_pendfree_lock); m->m_next = so_pendfree; so_pendfree = m; - if (sokvawaiters) - wakeup(&socurkva); - simple_unlock(&so_pendfree_slock); - splx(s); + cv_broadcast(&socurkva_cv); + mutex_exit(&so_pendfree_lock); } static long @@ -390,7 +364,7 @@ sosend_loan(struct socket *so, struct ui vaddr_t lva, va; int npgs, i, error; - if (VMSPACE_IS_KERNEL(uio->uio_vmspace)) + if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) return (0); if (iov->iov_len < (size_t) space) @@ -424,12 +398,12 @@ sosend_loan(struct socket *so, struct ui lva += (vaddr_t) iov->iov_base & PAGE_MASK; - MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); + MEXTADD(m, (void *) lva, space, M_MBUF, soloanfree, so); m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; uio->uio_resid -= space; /* uio_offset not updated, not set/used for write(2) */ - uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; + uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + space; uio->uio_iov->iov_len -= space; if (uio->uio_iov->iov_len == 0) { uio->uio_iov++; @@ -439,6 +413,35 @@ sosend_loan(struct socket *so, struct ui return (space); } +static int +sokva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) +{ + + KASSERT(ce == &sokva_reclaimerentry); + KASSERT(obj == NULL); + + sodopendfree(); + if (!vm_map_starved_p(kernel_map)) { + return CALLBACK_CHAIN_ABORT; + } + return CALLBACK_CHAIN_CONTINUE; +} + +void +soinit(void) +{ + + mutex_init(&so_pendfree_lock, MUTEX_DRIVER, IPL_VM); + cv_init(&socurkva_cv, "sokva"); + + /* Set the initial adjusted socket buffer size. */ + if (sb_max_set(sb_max)) + panic("bad initial sb_max value: %lu", sb_max); + + callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, + &sokva_reclaimerentry, NULL, sokva_reclaim_callback); +} + /* * Socket operation routines. * These routines are called by the routines in @@ -455,17 +458,32 @@ socreate(int dom, struct socket **aso, i uid_t uid; int error, s; + error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, + KAUTH_REQ_NETWORK_SOCKET_OPEN, KAUTH_ARG(dom), KAUTH_ARG(type), + KAUTH_ARG(proto)); + if (error) + return (error); + if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); - if (prp == 0 || prp->pr_usrreq == 0) + if (prp == 0) { + /* no support for domain */ + if (pffinddomain(dom) == 0) + return (EAFNOSUPPORT); + /* no support for socket type */ + if (proto == 0 && type != 0) + return (EPROTOTYPE); + return (EPROTONOSUPPORT); + } + if (prp->pr_usrreq == 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); s = splsoftnet(); so = pool_get(&socket_pool, PR_WAITOK); - memset((caddr_t)so, 0, sizeof(*so)); + memset((void *)so, 0, sizeof(*so)); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); so->so_type = type; @@ -477,11 +495,7 @@ socreate(int dom, struct socket **aso, i so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; so->so_mowner = &prp->pr_domain->dom_mowner; #endif - if (l != NULL) { - uid = l->l_proc->p_ucred->cr_uid; - } else { - uid = 0; - } + uid = kauth_cred_geteuid(l->l_cred); so->so_uidinfo = uid_find(uid); error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, (struct mbuf *)(long)proto, (struct mbuf *)0, l); @@ -591,7 +605,7 @@ soclose(struct socket *so) (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = tsleep((caddr_t)&so->so_timeo, + error = tsleep((void *)&so->so_timeo, PSOCK | PCATCH, netcls, so->so_linger * hz); if (error) @@ -705,7 +719,7 @@ sodisconnect(struct socket *so) (struct lwp *)0); bad: splx(s); - sodopendfree(so); + sodopendfree(); return (error); } @@ -737,7 +751,7 @@ sosend(struct socket *so, struct mbuf *a int error, s, dontroute, atomic; p = l->l_proc; - sodopendfree(so); + sodopendfree(); clen = 0; atomic = sosendallatonce(so) || top; @@ -825,9 +839,9 @@ sosend(struct socket *so, struct mbuf *a mlen = MLEN; } MCLAIM(m, so->so_snd.sb_mowner); - if (use_sosend_loan && - uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && - space >= SOCK_LOAN_THRESH && + if (sock_loan_thresh >= 0 && + uio->uio_iov->iov_len >= sock_loan_thresh && + space >= sock_loan_thresh && (len = sosend_loan(so, uio, m, space)) != 0) { SOSEND_COUNTER_INCR(&sosend_loan_big); @@ -859,7 +873,7 @@ sosend(struct socket *so, struct mbuf *a if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } - error = uiomove(mtod(m, caddr_t), (int)len, + error = uiomove(mtod(m, void *), (int)len, uio); have_data: resid = uio->uio_resid; @@ -955,7 +969,7 @@ soreceive(struct socket *so, struct mbuf flags = 0; if ((flags & MSG_DONTWAIT) == 0) - sodopendfree(so); + sodopendfree(); if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); @@ -965,7 +979,7 @@ soreceive(struct socket *so, struct mbuf if (error) goto bad; do { - error = uiomove(mtod(m, caddr_t), + error = uiomove(mtod(m, void *), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); @@ -1179,7 +1193,7 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); splx(s); - error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); + error = uiomove(mtod(m, char *) + moff, (int)len, uio); s = splsoftnet(); if (error) { /* @@ -1390,6 +1404,7 @@ sosetopt(struct socket *so, int level, i { int error; struct mbuf *m; + struct linger *l; error = 0; m = m0; @@ -1406,13 +1421,18 @@ sosetopt(struct socket *so, int level, i error = EINVAL; goto bad; } - if (mtod(m, struct linger *)->l_linger < 0 || - mtod(m, struct linger *)->l_linger > (INT_MAX / hz)) { + l = mtod(m, struct linger *); + if (l->l_linger < 0 || l->l_linger > USHRT_MAX || + l->l_linger > (INT_MAX / hz)) { error = EDOM; goto bad; } - so->so_linger = mtod(m, struct linger *)->l_linger; - /* fall thru... */ + so->so_linger = l->l_linger; + if (l->l_onoff) + so->so_options |= SO_LINGER; + else + so->so_options &= ~SO_LINGER; + break; case SO_DEBUG: case SO_KEEPALIVE: @@ -1552,7 +1572,7 @@ sogetopt(struct socket *so, int level, i case SO_LINGER: m->m_len = sizeof(struct linger); mtod(m, struct linger *)->l_onoff = - so->so_options & SO_LINGER; + (so->so_options & SO_LINGER) ? 1 : 0; mtod(m, struct linger *)->l_linger = so->so_linger; break; @@ -1565,7 +1585,7 @@ sogetopt(struct socket *so, int level, i case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: - *mtod(m, int *) = so->so_options & optname; + *mtod(m, int *) = (so->so_options & optname) ? 1 : 0; break; case SO_TYPE: @@ -1755,7 +1775,6 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) { int error, new_somaxkva; struct sysctlnode node; - int s; new_somaxkva = somaxkva; node = *rnode; @@ -1767,12 +1786,10 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ return (EINVAL); - s = splvm(); - simple_lock(&so_pendfree_slock); + mutex_enter(&so_pendfree_lock); somaxkva = new_somaxkva; - wakeup(&socurkva); - simple_unlock(&so_pendfree_slock); - splx(s); + cv_broadcast(&socurkva_cv); + mutex_exit(&so_pendfree_lock); return (error); }