version 1.79, 2003/04/09 18:38:03 |
version 1.100, 2004/04/25 16:42:41 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
* 2. Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* documentation and/or other materials provided with the distribution. |
* 3. All advertising materials mentioning features or use of this software |
* 3. Neither the name of the University nor the names of its contributors |
* must display the following acknowledgement: |
|
* This product includes software developed by the University of |
|
* California, Berkeley and its contributors. |
|
* 4. Neither the name of the University nor the names of its contributors |
|
* may be used to endorse or promote products derived from this software |
* may be used to endorse or promote products derived from this software |
* without specific prior written permission. |
* without specific prior written permission. |
* |
* |
Line 76 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 72 __KERNEL_RCSID(0, "$NetBSD$"); |
|
|
|
#include "opt_sock_counters.h" |
#include "opt_sock_counters.h" |
#include "opt_sosend_loan.h" |
#include "opt_sosend_loan.h" |
|
#include "opt_mbuftrace.h" |
|
#include "opt_somaxkva.h" |
|
|
#include <sys/param.h> |
#include <sys/param.h> |
#include <sys/systm.h> |
#include <sys/systm.h> |
Line 92 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 90 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/resourcevar.h> |
#include <sys/resourcevar.h> |
#include <sys/pool.h> |
#include <sys/pool.h> |
#include <sys/event.h> |
#include <sys/event.h> |
|
#include <sys/poll.h> |
|
|
#include <uvm/uvm.h> |
#include <uvm/uvm.h> |
|
|
struct pool socket_pool; |
POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); |
|
|
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
|
|
soinit(void) |
soinit(void) |
{ |
{ |
|
|
pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, |
/* Set the initial adjusted socket buffer size. */ |
"sockpl", NULL); |
if (sb_max_set(sb_max)) |
|
panic("bad initial sb_max value: %lu\n", sb_max); |
|
|
#ifdef SOSEND_COUNTERS |
#ifdef SOSEND_COUNTERS |
evcnt_attach_static(&sosend_loan_big); |
evcnt_attach_static(&sosend_loan_big); |
Line 144 int use_sosend_loan = 0; |
|
Line 144 int use_sosend_loan = 0; |
|
int use_sosend_loan = 1; |
int use_sosend_loan = 1; |
#endif |
#endif |
|
|
|
struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; |
struct mbuf *so_pendfree; |
struct mbuf *so_pendfree; |
|
|
int somaxkva = 16 * 1024 * 1024; |
#ifndef SOMAXKVA |
|
#define SOMAXKVA (16 * 1024 * 1024) |
|
#endif |
|
int somaxkva = SOMAXKVA; |
int socurkva; |
int socurkva; |
int sokvawaiters; |
int sokvawaiters; |
|
|
#define SOCK_LOAN_THRESH 4096 |
#define SOCK_LOAN_THRESH 4096 |
#define SOCK_LOAN_CHUNK 65536 |
#define SOCK_LOAN_CHUNK 65536 |
|
|
|
static size_t sodopendfree(struct socket *); |
|
static size_t sodopendfreel(struct socket *); |
|
static __inline vsize_t sokvareserve(struct socket *, vsize_t); |
|
static __inline void sokvaunreserve(vsize_t); |
|
|
|
static __inline vsize_t |
|
sokvareserve(struct socket *so, vsize_t len) |
|
{ |
|
int s; |
|
int error; |
|
|
|
s = splvm(); |
|
simple_lock(&so_pendfree_slock); |
|
while (socurkva + len > somaxkva) { |
|
size_t freed; |
|
|
|
/* |
|
* try to do pendfree. |
|
*/ |
|
|
|
freed = sodopendfreel(so); |
|
|
|
/* |
|
* if some kva was freed, try again. |
|
*/ |
|
|
|
if (freed) |
|
continue; |
|
|
|
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
|
sokvawaiters++; |
|
error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, |
|
&so_pendfree_slock); |
|
sokvawaiters--; |
|
if (error) { |
|
len = 0; |
|
break; |
|
} |
|
} |
|
socurkva += len; |
|
simple_unlock(&so_pendfree_slock); |
|
splx(s); |
|
return len; |
|
} |
|
|
|
static __inline void |
|
sokvaunreserve(vsize_t len) |
|
{ |
|
int s; |
|
|
|
s = splvm(); |
|
simple_lock(&so_pendfree_slock); |
|
socurkva -= len; |
|
if (sokvawaiters) |
|
wakeup(&socurkva); |
|
simple_unlock(&so_pendfree_slock); |
|
splx(s); |
|
} |
|
|
|
/* |
|
* sokvaalloc: allocate kva for loan. |
|
*/ |
|
|
|
vaddr_t |
|
sokvaalloc(vsize_t len, struct socket *so) |
|
{ |
|
vaddr_t lva; |
|
|
|
/* |
|
* reserve kva. |
|
*/ |
|
|
|
if (sokvareserve(so, len) == 0) |
|
return 0; |
|
|
|
/* |
|
* allocate kva. |
|
*/ |
|
|
|
lva = uvm_km_valloc_wait(kernel_map, len); |
|
if (lva == 0) { |
|
sokvaunreserve(len); |
|
return (0); |
|
} |
|
|
|
return lva; |
|
} |
|
|
|
/* |
|
* sokvafree: free kva for loan. |
|
*/ |
|
|
|
void |
|
sokvafree(vaddr_t sva, vsize_t len) |
|
{ |
|
|
|
/* |
|
* free kva. |
|
*/ |
|
|
|
uvm_km_free(kernel_map, sva, len); |
|
|
|
/* |
|
* unreserve kva. |
|
*/ |
|
|
|
sokvaunreserve(len); |
|
} |
|
|
static void |
static void |
sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) |
sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) |
{ |
{ |
Line 179 sodoloanfree(struct vm_page **pgs, caddr |
|
Line 292 sodoloanfree(struct vm_page **pgs, caddr |
|
pmap_kremove(sva, len); |
pmap_kremove(sva, len); |
pmap_update(pmap_kernel()); |
pmap_update(pmap_kernel()); |
uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); |
uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); |
uvm_km_free(kernel_map, sva, len); |
sokvafree(sva, len); |
socurkva -= len; |
|
if (sokvawaiters) |
|
wakeup(&socurkva); |
|
} |
} |
|
|
static size_t |
static size_t |
sodopendfree(struct socket *so) |
sodopendfree(struct socket *so) |
{ |
{ |
struct mbuf *m; |
|
size_t rv = 0; |
|
int s; |
int s; |
|
size_t rv; |
|
|
s = splvm(); |
s = splvm(); |
|
simple_lock(&so_pendfree_slock); |
|
rv = sodopendfreel(so); |
|
simple_unlock(&so_pendfree_slock); |
|
splx(s); |
|
|
for (;;) { |
return rv; |
m = so_pendfree; |
} |
if (m == NULL) |
|
break; |
|
so_pendfree = m->m_next; |
|
splx(s); |
|
|
|
rv += m->m_ext.ext_size; |
/* |
sodoloanfree((m->m_flags & M_EXT_PAGES) ? |
* sodopendfreel: free mbufs on "pendfree" list. |
m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, |
* unlock and relock so_pendfree_slock when freeing mbufs. |
m->m_ext.ext_size); |
* |
s = splvm(); |
* => called with so_pendfree_slock held. |
pool_cache_put(&mbpool_cache, m); |
* => called at splvm. |
} |
*/ |
|
|
|
static size_t |
|
sodopendfreel(struct socket *so) |
|
{ |
|
size_t rv = 0; |
|
|
|
LOCK_ASSERT(simple_lock_held(&so_pendfree_slock)); |
|
|
for (;;) { |
for (;;) { |
m = so->so_pendfree; |
struct mbuf *m; |
|
struct mbuf *next; |
|
|
|
m = so_pendfree; |
if (m == NULL) |
if (m == NULL) |
break; |
break; |
so->so_pendfree = m->m_next; |
so_pendfree = NULL; |
splx(s); |
simple_unlock(&so_pendfree_slock); |
|
/* XXX splx */ |
|
|
|
for (; m != NULL; m = next) { |
|
next = m->m_next; |
|
|
|
rv += m->m_ext.ext_size; |
|
sodoloanfree((m->m_flags & M_EXT_PAGES) ? |
|
m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
|
pool_cache_put(&mbpool_cache, m); |
|
} |
|
|
rv += m->m_ext.ext_size; |
/* XXX splvm */ |
sodoloanfree((m->m_flags & M_EXT_PAGES) ? |
simple_lock(&so_pendfree_slock); |
m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
|
s = splvm(); |
|
pool_cache_put(&mbpool_cache, m); |
|
} |
} |
|
|
splx(s); |
|
return (rv); |
return (rv); |
} |
} |
|
|
static void |
void |
soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) |
soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) |
{ |
{ |
struct socket *so = arg; |
|
int s; |
int s; |
|
|
if (m == NULL) { |
if (m == NULL) { |
|
|
|
/* |
|
* called from MEXTREMOVE. |
|
*/ |
|
|
sodoloanfree(NULL, buf, size); |
sodoloanfree(NULL, buf, size); |
return; |
return; |
} |
} |
|
|
|
/* |
|
* postpone freeing mbuf. |
|
* |
|
* we can't do it in interrupt context |
|
* because we need to put kva back to kernel_map. |
|
*/ |
|
|
s = splvm(); |
s = splvm(); |
m->m_next = so->so_pendfree; |
simple_lock(&so_pendfree_slock); |
so->so_pendfree = m; |
m->m_next = so_pendfree; |
splx(s); |
so_pendfree = m; |
if (sokvawaiters) |
if (sokvawaiters) |
wakeup(&socurkva); |
wakeup(&socurkva); |
|
simple_unlock(&so_pendfree_slock); |
|
splx(s); |
} |
} |
|
|
static long |
static long |
Line 254 sosend_loan(struct socket *so, struct ui |
|
Line 392 sosend_loan(struct socket *so, struct ui |
|
vaddr_t sva, eva; |
vaddr_t sva, eva; |
vsize_t len; |
vsize_t len; |
vaddr_t lva, va; |
vaddr_t lva, va; |
int npgs, s, i, error; |
int npgs, i, error; |
|
|
if (uio->uio_segflg != UIO_USERSPACE) |
if (uio->uio_segflg != UIO_USERSPACE) |
return (0); |
return (0); |
Line 272 sosend_loan(struct socket *so, struct ui |
|
Line 410 sosend_loan(struct socket *so, struct ui |
|
/* XXX KDASSERT */ |
/* XXX KDASSERT */ |
KASSERT(npgs <= M_EXT_MAXPAGES); |
KASSERT(npgs <= M_EXT_MAXPAGES); |
|
|
while (socurkva + len > somaxkva) { |
lva = sokvaalloc(len, so); |
if (sodopendfree(so)) |
|
continue; |
|
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
|
s = splvm(); |
|
sokvawaiters++; |
|
(void) tsleep(&socurkva, PVM, "sokva", 0); |
|
sokvawaiters--; |
|
splx(s); |
|
} |
|
|
|
lva = uvm_km_valloc_wait(kernel_map, len); |
|
if (lva == 0) |
if (lva == 0) |
return (0); |
return 0; |
socurkva += len; |
|
|
|
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, |
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, |
m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); |
m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); |
if (error) { |
if (error) { |
uvm_km_free(kernel_map, lva, len); |
sokvafree(lva, len); |
socurkva -= len; |
|
return (0); |
return (0); |
} |
} |
|
|
|
|
socreate(int dom, struct socket **aso, int type, int proto) |
socreate(int dom, struct socket **aso, int type, int proto) |
{ |
{ |
struct proc *p; |
struct proc *p; |
struct protosw *prp; |
const struct protosw *prp; |
struct socket *so; |
struct socket *so; |
int error, s; |
int error, s; |
|
|
Line 359 socreate(int dom, struct socket **aso, i |
|
Line 484 socreate(int dom, struct socket **aso, i |
|
#endif |
#endif |
if (p != 0) |
if (p != 0) |
so->so_uid = p->p_ucred->cr_uid; |
so->so_uid = p->p_ucred->cr_uid; |
|
else |
|
so->so_uid = UID_MAX; |
error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, |
error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, |
(struct mbuf *)(long)proto, (struct mbuf *)0, p); |
(struct mbuf *)(long)proto, (struct mbuf *)0, p); |
if (error) { |
if (error) { |
Line 408 solisten(struct socket *so, int backlog) |
|
Line 535 solisten(struct socket *so, int backlog) |
|
void |
void |
sofree(struct socket *so) |
sofree(struct socket *so) |
{ |
{ |
struct mbuf *m; |
|
|
|
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) |
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) |
return; |
return; |
Line 421 sofree(struct socket *so) |
|
Line 547 sofree(struct socket *so) |
|
if (!soqremque(so, 0)) |
if (!soqremque(so, 0)) |
return; |
return; |
} |
} |
sbrelease(&so->so_snd); |
if (so->so_rcv.sb_hiwat) |
|
(void)chgsbsize(so->so_uid, &so->so_rcv.sb_hiwat, 0, |
|
RLIM_INFINITY); |
|
if (so->so_snd.sb_hiwat) |
|
(void)chgsbsize(so->so_uid, &so->so_snd.sb_hiwat, 0, |
|
RLIM_INFINITY); |
|
sbrelease(&so->so_snd, so); |
sorflush(so); |
sorflush(so); |
while ((m = so->so_pendfree) != NULL) { |
|
so->so_pendfree = m->m_next; |
|
m->m_next = so_pendfree; |
|
so_pendfree = m; |
|
} |
|
pool_put(&socket_pool, so); |
pool_put(&socket_pool, so); |
} |
} |
|
|
Line 669 sosend(struct socket *so, struct mbuf *a |
|
Line 796 sosend(struct socket *so, struct mbuf *a |
|
if ((atomic && resid > so->so_snd.sb_hiwat) || |
if ((atomic && resid > so->so_snd.sb_hiwat) || |
clen > so->so_snd.sb_hiwat) |
clen > so->so_snd.sb_hiwat) |
snderr(EMSGSIZE); |
snderr(EMSGSIZE); |
if (space < resid + clen && uio && |
if (space < resid + clen && |
(atomic || space < so->so_snd.sb_lowat || space < clen)) { |
(atomic || space < so->so_snd.sb_lowat || space < clen)) { |
if (so->so_state & SS_NBIO) |
if (so->so_state & SS_NBIO) |
snderr(EWOULDBLOCK); |
snderr(EWOULDBLOCK); |
Line 812 soreceive(struct socket *so, struct mbuf |
|
Line 939 soreceive(struct socket *so, struct mbuf |
|
{ |
{ |
struct mbuf *m, **mp; |
struct mbuf *m, **mp; |
int flags, len, error, s, offset, moff, type, orig_resid; |
int flags, len, error, s, offset, moff, type, orig_resid; |
struct protosw *pr; |
const struct protosw *pr; |
struct mbuf *nextrecord; |
struct mbuf *nextrecord; |
int mbuf_removed = 0; |
int mbuf_removed = 0; |
|
|
Line 1215 soreceive(struct socket *so, struct mbuf |
|
Line 1342 soreceive(struct socket *so, struct mbuf |
|
int |
int |
soshutdown(struct socket *so, int how) |
soshutdown(struct socket *so, int how) |
{ |
{ |
struct protosw *pr; |
const struct protosw *pr; |
|
|
pr = so->so_proto; |
pr = so->so_proto; |
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) |
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) |
|
|
sorflush(struct socket *so) |
sorflush(struct socket *so) |
{ |
{ |
struct sockbuf *sb, asb; |
struct sockbuf *sb, asb; |
struct protosw *pr; |
const struct protosw *pr; |
int s; |
int s; |
|
|
sb = &so->so_rcv; |
sb = &so->so_rcv; |
Line 1244 sorflush(struct socket *so) |
|
Line 1371 sorflush(struct socket *so) |
|
socantrcvmore(so); |
socantrcvmore(so); |
sbunlock(sb); |
sbunlock(sb); |
asb = *sb; |
asb = *sb; |
memset((caddr_t)sb, 0, sizeof(*sb)); |
/* |
|
* Clear most of the sockbuf structure, but leave some of the |
|
* fields valid. |
|
*/ |
|
memset(&sb->sb_startzero, 0, |
|
sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); |
splx(s); |
splx(s); |
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) |
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) |
(*pr->pr_domain->dom_dispose)(asb.sb_mb); |
(*pr->pr_domain->dom_dispose)(asb.sb_mb); |
sbrelease(&asb); |
sbrelease(&asb, so); |
} |
} |
|
|
int |
int |
Line 1322 sosetopt(struct socket *so, int level, i |
|
Line 1454 sosetopt(struct socket *so, int level, i |
|
case SO_RCVBUF: |
case SO_RCVBUF: |
if (sbreserve(optname == SO_SNDBUF ? |
if (sbreserve(optname == SO_SNDBUF ? |
&so->so_snd : &so->so_rcv, |
&so->so_snd : &so->so_rcv, |
(u_long) optval) == 0) { |
(u_long) optval, so) == 0) { |
error = ENOBUFS; |
error = ENOBUFS; |
goto bad; |
goto bad; |
} |
} |
Line 1479 sogetopt(struct socket *so, int level, i |
|
Line 1611 sogetopt(struct socket *so, int level, i |
|
void |
void |
sohasoutofband(struct socket *so) |
sohasoutofband(struct socket *so) |
{ |
{ |
struct proc *p; |
fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); |
|
|
if (so->so_pgid < 0) |
|
gsignal(-so->so_pgid, SIGURG); |
|
else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) |
|
psignal(p, SIGURG); |
|
selwakeup(&so->so_rcv.sb_sel); |
selwakeup(&so->so_rcv.sb_sel); |
} |
} |
|
|
Line 1603 soo_kqfilter(struct file *fp, struct kno |
|
Line 1730 soo_kqfilter(struct file *fp, struct kno |
|
return (0); |
return (0); |
} |
} |
|
|
|
#include <sys/sysctl.h> |
|
|
|
static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); |
|
|
|
/* |
|
* sysctl helper routine for kern.somaxkva. ensures that the given |
|
* value is not too small. |
|
* (XXX should we maybe make sure it's not too large as well?) |
|
*/ |
|
static int |
|
sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
{ |
|
int error, new_somaxkva; |
|
struct sysctlnode node; |
|
int s; |
|
|
|
new_somaxkva = somaxkva; |
|
node = *rnode; |
|
node.sysctl_data = &new_somaxkva; |
|
error = sysctl_lookup(SYSCTLFN_CALL(&node)); |
|
if (error || newp == NULL) |
|
return (error); |
|
|
|
if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ |
|
return (EINVAL); |
|
|
|
s = splvm(); |
|
simple_lock(&so_pendfree_slock); |
|
somaxkva = new_somaxkva; |
|
wakeup(&socurkva); |
|
simple_unlock(&so_pendfree_slock); |
|
splx(s); |
|
|
|
return (error); |
|
} |
|
|
|
SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") |
|
{ |
|
|
|
sysctl_createv(clog, 0, NULL, NULL, |
|
CTLFLAG_PERMANENT, |
|
CTLTYPE_NODE, "kern", NULL, |
|
NULL, 0, NULL, 0, |
|
CTL_KERN, CTL_EOL); |
|
|
|
sysctl_createv(clog, 0, NULL, NULL, |
|
CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
|
CTLTYPE_INT, "somaxkva", NULL, |
|
sysctl_kern_somaxkva, 0, NULL, 0, |
|
CTL_KERN, KERN_SOMAXKVA, CTL_EOL); |
|
} |