version 1.160.2.3, 2009/09/16 13:38:01 |
version 1.177.4.4, 2011/08/08 19:45:57 |
|
|
#include <sys/cdefs.h> |
#include <sys/cdefs.h> |
__KERNEL_RCSID(0, "$NetBSD$"); |
__KERNEL_RCSID(0, "$NetBSD$"); |
|
|
#include "opt_compat_netbsd.h" |
|
#include "opt_sock_counters.h" |
#include "opt_sock_counters.h" |
#include "opt_sosend_loan.h" |
#include "opt_sosend_loan.h" |
#include "opt_mbuftrace.h" |
#include "opt_mbuftrace.h" |
Line 92 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 91 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/kauth.h> |
#include <sys/kauth.h> |
#include <sys/mutex.h> |
#include <sys/mutex.h> |
#include <sys/condvar.h> |
#include <sys/condvar.h> |
|
#include <sys/kthread.h> |
#ifdef COMPAT_50 |
|
#include <compat/sys/time.h> |
|
#include <compat/sys/socket.h> |
|
#endif |
|
|
|
#include <uvm/uvm.h> |
#include <uvm/uvm.h> |
|
|
Line 142 int sock_loan_thresh = 4096; |
|
Line 137 int sock_loan_thresh = 4096; |
|
#endif |
#endif |
|
|
static kmutex_t so_pendfree_lock; |
static kmutex_t so_pendfree_lock; |
static struct mbuf *so_pendfree; |
static struct mbuf *so_pendfree = NULL; |
|
|
#ifndef SOMAXKVA |
#ifndef SOMAXKVA |
#define SOMAXKVA (16 * 1024 * 1024) |
#define SOMAXKVA (16 * 1024 * 1024) |
Line 153 static kcondvar_t socurkva_cv; |
|
Line 148 static kcondvar_t socurkva_cv; |
|
|
|
#define SOCK_LOAN_CHUNK 65536 |
#define SOCK_LOAN_CHUNK 65536 |
|
|
static size_t sodopendfree(void); |
static void sopendfree_thread(void *); |
static size_t sodopendfreel(void); |
static kcondvar_t pendfree_thread_cv; |
|
static lwp_t *sopendfree_lwp; |
static void sysctl_kern_somaxkva_setup(void); |
|
static struct sysctllog *socket_sysctllog; |
|
|
|
static vsize_t |
static vsize_t |
sokvareserve(struct socket *so, vsize_t len) |
sokvareserve(struct socket *so, vsize_t len) |
Line 166 sokvareserve(struct socket *so, vsize_t |
|
Line 159 sokvareserve(struct socket *so, vsize_t |
|
|
|
mutex_enter(&so_pendfree_lock); |
mutex_enter(&so_pendfree_lock); |
while (socurkva + len > somaxkva) { |
while (socurkva + len > somaxkva) { |
size_t freed; |
|
|
|
/* |
|
* try to do pendfree. |
|
*/ |
|
|
|
freed = sodopendfreel(); |
|
|
|
/* |
|
* if some kva was freed, try again. |
|
*/ |
|
|
|
if (freed) |
|
continue; |
|
|
|
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); |
error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); |
if (error) { |
if (error) { |
Line 273 sodoloanfree(struct vm_page **pgs, void |
|
Line 251 sodoloanfree(struct vm_page **pgs, void |
|
sokvafree(sva, len); |
sokvafree(sva, len); |
} |
} |
|
|
static size_t |
|
sodopendfree(void) |
|
{ |
|
size_t rv; |
|
|
|
if (__predict_true(so_pendfree == NULL)) |
|
return 0; |
|
|
|
mutex_enter(&so_pendfree_lock); |
|
rv = sodopendfreel(); |
|
mutex_exit(&so_pendfree_lock); |
|
|
|
return rv; |
|
} |
|
|
|
/* |
/* |
* sodopendfreel: free mbufs on "pendfree" list. |
* sopendfree_thread: free mbufs on "pendfree" list. |
* unlock and relock so_pendfree_lock when freeing mbufs. |
* unlock and relock so_pendfree_lock when freeing mbufs. |
* |
|
* => called with so_pendfree_lock held. |
|
*/ |
*/ |
|
|
static size_t |
static void |
sodopendfreel(void) |
sopendfree_thread(void *v) |
{ |
{ |
struct mbuf *m, *next; |
struct mbuf *m, *next; |
size_t rv = 0; |
size_t rv; |
|
|
KASSERT(mutex_owned(&so_pendfree_lock)); |
|
|
|
while (so_pendfree != NULL) { |
mutex_enter(&so_pendfree_lock); |
m = so_pendfree; |
|
so_pendfree = NULL; |
|
mutex_exit(&so_pendfree_lock); |
|
|
|
for (; m != NULL; m = next) { |
for (;;) { |
next = m->m_next; |
rv = 0; |
KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); |
while (so_pendfree != NULL) { |
KASSERT(m->m_ext.ext_refcnt == 0); |
m = so_pendfree; |
|
so_pendfree = NULL; |
|
mutex_exit(&so_pendfree_lock); |
|
|
|
for (; m != NULL; m = next) { |
|
next = m->m_next; |
|
KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); |
|
KASSERT(m->m_ext.ext_refcnt == 0); |
|
|
|
rv += m->m_ext.ext_size; |
|
sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
|
pool_cache_put(mb_cache, m); |
|
} |
|
|
rv += m->m_ext.ext_size; |
mutex_enter(&so_pendfree_lock); |
sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
|
pool_cache_put(mb_cache, m); |
|
} |
} |
|
if (rv) |
mutex_enter(&so_pendfree_lock); |
cv_broadcast(&socurkva_cv); |
|
cv_wait(&pendfree_thread_cv, &so_pendfree_lock); |
} |
} |
|
panic("sopendfree_thread"); |
return (rv); |
/* NOTREACHED */ |
} |
} |
|
|
void |
void |
Line 341 soloanfree(struct mbuf *m, void *buf, si |
|
Line 308 soloanfree(struct mbuf *m, void *buf, si |
|
mutex_enter(&so_pendfree_lock); |
mutex_enter(&so_pendfree_lock); |
m->m_next = so_pendfree; |
m->m_next = so_pendfree; |
so_pendfree = m; |
so_pendfree = m; |
cv_broadcast(&socurkva_cv); |
cv_signal(&pendfree_thread_cv); |
mutex_exit(&so_pendfree_lock); |
mutex_exit(&so_pendfree_lock); |
} |
} |
|
|
Line 411 sokva_reclaim_callback(struct callback_e |
|
Line 378 sokva_reclaim_callback(struct callback_e |
|
KASSERT(ce == &sokva_reclaimerentry); |
KASSERT(ce == &sokva_reclaimerentry); |
KASSERT(obj == NULL); |
KASSERT(obj == NULL); |
|
|
sodopendfree(); |
|
if (!vm_map_starved_p(kernel_map)) { |
if (!vm_map_starved_p(kernel_map)) { |
return CALLBACK_CHAIN_ABORT; |
return CALLBACK_CHAIN_ABORT; |
} |
} |
Line 429 getsombuf(struct socket *so, int type) |
|
Line 395 getsombuf(struct socket *so, int type) |
|
} |
} |
|
|
void |
void |
soinit(void) |
soinit() |
{ |
{ |
|
|
sysctl_kern_somaxkva_setup(); |
|
|
|
mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); |
mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); |
softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
cv_init(&socurkva_cv, "sokva"); |
cv_init(&socurkva_cv, "sokva"); |
|
cv_init(&pendfree_thread_cv, "sopendfr"); |
soinit2(); |
soinit2(); |
|
|
|
|
/* Set the initial adjusted socket buffer size. */ |
/* Set the initial adjusted socket buffer size. */ |
if (sb_max_set(sb_max)) |
if (sb_max_set(sb_max)) |
panic("bad initial sb_max value: %lu", sb_max); |
panic("bad initial sb_max value: %lu", sb_max); |
|
|
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
} |
} |
|
|
|
void |
|
soinit1(void) |
|
{ |
|
int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, |
|
sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree"); |
|
if (error) |
|
panic("soinit1 %d", error); |
|
} |
|
|
/* |
/* |
* Socket operation routines. |
* Socket operation routines. |
* These routines are called by the routines in |
* These routines are called by the routines in |
Line 555 fsocreate(int domain, struct socket **so |
|
Line 529 fsocreate(int domain, struct socket **so |
|
} |
} |
|
|
int |
int |
sofamily(const struct socket *so) |
|
{ |
|
const struct protosw *pr; |
|
const struct domain *dom; |
|
|
|
if ((pr = so->so_proto) == NULL) |
|
return AF_UNSPEC; |
|
if ((dom = pr->pr_domain) == NULL) |
|
return AF_UNSPEC; |
|
return dom->dom_family; |
|
} |
|
|
|
int |
|
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
{ |
{ |
int error; |
int error; |
Line 811 sodisconnect(struct socket *so) |
|
Line 772 sodisconnect(struct socket *so) |
|
error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, |
error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, |
NULL, NULL, NULL, NULL); |
NULL, NULL, NULL, NULL); |
} |
} |
sodopendfree(); |
|
return (error); |
return (error); |
} |
} |
|
|
Line 843 sosend(struct socket *so, struct mbuf *a |
|
Line 803 sosend(struct socket *so, struct mbuf *a |
|
int error, s, dontroute, atomic; |
int error, s, dontroute, atomic; |
|
|
p = l->l_proc; |
p = l->l_proc; |
sodopendfree(); |
|
clen = 0; |
clen = 0; |
|
|
/* |
/* |
Line 1112 soreceive(struct socket *so, struct mbuf |
|
Line 1071 soreceive(struct socket *so, struct mbuf |
|
else |
else |
flags = 0; |
flags = 0; |
|
|
if ((flags & MSG_DONTWAIT) == 0) |
|
sodopendfree(); |
|
|
|
if (flags & MSG_OOB) { |
if (flags & MSG_OOB) { |
m = m_get(M_WAIT, MT_DATA); |
m = m_get(M_WAIT, MT_DATA); |
solock(so); |
solock(so); |
Line 1609 sorflush(struct socket *so) |
|
Line 1565 sorflush(struct socket *so) |
|
static int |
static int |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
{ |
{ |
int error = EINVAL, optval, opt; |
int error, optval; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch ((opt = sopt->sopt_name)) { |
switch (sopt->sopt_name) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_setopt(so, sopt); |
error = accept_filt_setopt(so, sopt); |
Line 1646 sosetopt1(struct socket *so, const struc |
|
Line 1602 sosetopt1(struct socket *so, const struc |
|
case SO_REUSEPORT: |
case SO_REUSEPORT: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
#ifdef SO_OTIMESTAMP |
|
case SO_OTIMESTAMP: |
|
#endif |
|
error = sockopt_getint(sopt, &optval); |
error = sockopt_getint(sopt, &optval); |
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
if (optval) |
if (optval) |
so->so_options |= opt; |
so->so_options |= sopt->sopt_name; |
else |
else |
so->so_options &= ~opt; |
so->so_options &= ~sopt->sopt_name; |
break; |
break; |
|
|
case SO_SNDBUF: |
case SO_SNDBUF: |
Line 1677 sosetopt1(struct socket *so, const struc |
|
Line 1630 sosetopt1(struct socket *so, const struc |
|
break; |
break; |
} |
} |
|
|
switch (opt) { |
switch (sopt->sopt_name) { |
case SO_SNDBUF: |
case SO_SNDBUF: |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
error = ENOBUFS; |
error = ENOBUFS; |
Line 1714 sosetopt1(struct socket *so, const struc |
|
Line 1667 sosetopt1(struct socket *so, const struc |
|
} |
} |
break; |
break; |
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
error = sockopt_get(sopt, &otv, sizeof(otv)); |
|
if (error) { |
|
solock(so); |
|
break; |
|
} |
|
timeval50_to_timeval(&otv, &tv); |
|
opt = opt == SO_OSNDTIMEO ? SO_SNDTIMEO : SO_RCVTIMEO; |
|
error = 0; |
|
/*FALLTHROUGH*/ |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
if (error) |
error = sockopt_get(sopt, &tv, sizeof(tv)); |
error = sockopt_get(sopt, &tv, sizeof(tv)); |
|
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
Line 1747 sosetopt1(struct socket *so, const struc |
|
Line 1683 sosetopt1(struct socket *so, const struc |
|
if (optval == 0 && tv.tv_usec != 0) |
if (optval == 0 && tv.tv_usec != 0) |
optval = 1; |
optval = 1; |
|
|
switch (opt) { |
switch (sopt->sopt_name) { |
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
so->so_snd.sb_timeo = optval; |
so->so_snd.sb_timeo = optval; |
break; |
break; |
Line 1820 so_setsockopt(struct lwp *l, struct sock |
|
Line 1756 so_setsockopt(struct lwp *l, struct sock |
|
static int |
static int |
sogetopt1(struct socket *so, struct sockopt *sopt) |
sogetopt1(struct socket *so, struct sockopt *sopt) |
{ |
{ |
int error, optval, opt; |
int error, optval; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch ((opt = sopt->sopt_name)) { |
switch (sopt->sopt_name) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_getopt(so, sopt); |
error = accept_filt_getopt(so, sopt); |
Line 1846 sogetopt1(struct socket *so, struct sock |
|
Line 1782 sogetopt1(struct socket *so, struct sock |
|
case SO_BROADCAST: |
case SO_BROADCAST: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
#ifdef SO_OTIMESTAMP |
error = sockopt_setint(sopt, |
case SO_OTIMESTAMP: |
(so->so_options & sopt->sopt_name) ? 1 : 0); |
#endif |
|
error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); |
|
break; |
break; |
|
|
case SO_TYPE: |
case SO_TYPE: |
Line 1877 sogetopt1(struct socket *so, struct sock |
|
Line 1811 sogetopt1(struct socket *so, struct sock |
|
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
break; |
break; |
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
|
|
optval = (opt == SO_OSNDTIMEO ? |
|
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
|
otv.tv_sec = optval / hz; |
|
otv.tv_usec = (optval % hz) * tick; |
|
|
|
error = sockopt_set(sopt, &otv, sizeof(otv)); |
|
break; |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
optval = (opt == SO_SNDTIMEO ? |
optval = (sopt->sopt_name == SO_SNDTIMEO ? |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
tv.tv_sec = optval / hz; |
tv.tv_sec = optval / hz; |
Line 2336 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
Line 2254 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
return (error); |
return (error); |
} |
} |
|
|
static void |
SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") |
sysctl_kern_somaxkva_setup(void) |
|
{ |
{ |
|
|
KASSERT(socket_sysctllog == NULL); |
sysctl_createv(clog, 0, NULL, NULL, |
sysctl_createv(&socket_sysctllog, 0, NULL, NULL, |
|
CTLFLAG_PERMANENT, |
CTLFLAG_PERMANENT, |
CTLTYPE_NODE, "kern", NULL, |
CTLTYPE_NODE, "kern", NULL, |
NULL, 0, NULL, 0, |
NULL, 0, NULL, 0, |
CTL_KERN, CTL_EOL); |
CTL_KERN, CTL_EOL); |
|
|
sysctl_createv(&socket_sysctllog, 0, NULL, NULL, |
sysctl_createv(clog, 0, NULL, NULL, |
CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
CTLTYPE_INT, "somaxkva", |
CTLTYPE_INT, "somaxkva", |
SYSCTL_DESCR("Maximum amount of kernel memory to be " |
SYSCTL_DESCR("Maximum amount of kernel memory to be " |