version 1.177.4.3, 2009/05/03 13:18:55 |
version 1.200, 2009/12/30 22:12:12 |
|
|
#include <sys/cdefs.h> |
#include <sys/cdefs.h> |
__KERNEL_RCSID(0, "$NetBSD$"); |
__KERNEL_RCSID(0, "$NetBSD$"); |
|
|
|
#include "opt_compat_netbsd.h" |
#include "opt_sock_counters.h" |
#include "opt_sock_counters.h" |
#include "opt_sosend_loan.h" |
#include "opt_sosend_loan.h" |
#include "opt_mbuftrace.h" |
#include "opt_mbuftrace.h" |
Line 92 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 93 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/mutex.h> |
#include <sys/mutex.h> |
#include <sys/condvar.h> |
#include <sys/condvar.h> |
|
|
|
#ifdef COMPAT_50 |
|
#include <compat/sys/time.h> |
|
#include <compat/sys/socket.h> |
|
#endif |
|
|
#include <uvm/uvm.h> |
#include <uvm/uvm.h> |
|
|
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
Line 145 int somaxkva = SOMAXKVA; |
|
Line 151 int somaxkva = SOMAXKVA; |
|
static int socurkva; |
static int socurkva; |
static kcondvar_t socurkva_cv; |
static kcondvar_t socurkva_cv; |
|
|
|
static kauth_listener_t socket_listener; |
|
|
#define SOCK_LOAN_CHUNK 65536 |
#define SOCK_LOAN_CHUNK 65536 |
|
|
static size_t sodopendfree(void); |
static size_t sodopendfree(void); |
static size_t sodopendfreel(void); |
static size_t sodopendfreel(void); |
|
|
|
static void sysctl_kern_somaxkva_setup(void); |
|
static struct sysctllog *socket_sysctllog; |
|
|
static vsize_t |
static vsize_t |
sokvareserve(struct socket *so, vsize_t len) |
sokvareserve(struct socket *so, vsize_t len) |
{ |
{ |
Line 375 sosend_loan(struct socket *so, struct ui |
|
Line 386 sosend_loan(struct socket *so, struct ui |
|
|
|
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), |
VM_PROT_READ); |
VM_PROT_READ, 0); |
pmap_update(pmap_kernel()); |
pmap_update(pmap_kernel()); |
|
|
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
Line 419 getsombuf(struct socket *so, int type) |
|
Line 430 getsombuf(struct socket *so, int type) |
|
return m; |
return m; |
} |
} |
|
|
|
static int |
|
socket_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, |
|
void *arg0, void *arg1, void *arg2, void *arg3) |
|
{ |
|
int result; |
|
enum kauth_network_req req; |
|
|
|
result = KAUTH_RESULT_DEFER; |
|
req = (enum kauth_network_req)arg0; |
|
|
|
if ((action != KAUTH_NETWORK_SOCKET) && |
|
(action != KAUTH_NETWORK_BIND)) |
|
return result; |
|
|
|
switch (req) { |
|
case KAUTH_REQ_NETWORK_BIND_PORT: |
|
result = KAUTH_RESULT_ALLOW; |
|
break; |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_DROP: { |
|
/* Normal users can only drop their own connections. */ |
|
struct socket *so = (struct socket *)arg1; |
|
|
|
if (proc_uidmatch(cred, so->so_cred)) |
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
} |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_OPEN: |
|
/* We allow "raw" routing/bluetooth sockets to anyone. */ |
|
if ((u_long)arg1 == PF_ROUTE || (u_long)arg1 == PF_BLUETOOTH) |
|
result = KAUTH_RESULT_ALLOW; |
|
else { |
|
/* Privileged, let secmodel handle this. */ |
|
if ((u_long)arg2 == SOCK_RAW) |
|
break; |
|
} |
|
|
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_CANSEE: |
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
|
|
default: |
|
break; |
|
} |
|
|
|
return result; |
|
} |
|
|
void |
void |
soinit(void) |
soinit(void) |
{ |
{ |
|
|
|
sysctl_kern_somaxkva_setup(); |
|
|
mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); |
mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); |
softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
cv_init(&socurkva_cv, "sokva"); |
cv_init(&socurkva_cv, "sokva"); |
|
|
|
|
callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, |
callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, |
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
|
|
|
socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, |
|
socket_listener_cb, NULL); |
} |
} |
|
|
/* |
/* |
Line 490 socreate(int dom, struct socket **aso, i |
|
Line 561 socreate(int dom, struct socket **aso, i |
|
#endif |
#endif |
uid = kauth_cred_geteuid(l->l_cred); |
uid = kauth_cred_geteuid(l->l_cred); |
so->so_uidinfo = uid_find(uid); |
so->so_uidinfo = uid_find(uid); |
so->so_egid = kauth_cred_getegid(l->l_cred); |
|
so->so_cpid = l->l_proc->p_pid; |
so->so_cpid = l->l_proc->p_pid; |
if (lockso != NULL) { |
if (lockso != NULL) { |
/* Caller wants us to share a lock. */ |
/* Caller wants us to share a lock. */ |
Line 509 socreate(int dom, struct socket **aso, i |
|
Line 579 socreate(int dom, struct socket **aso, i |
|
sofree(so); |
sofree(so); |
return error; |
return error; |
} |
} |
|
so->so_cred = kauth_cred_dup(l->l_cred); |
sounlock(so); |
sounlock(so); |
*aso = so; |
*aso = so; |
return 0; |
return 0; |
Line 544 fsocreate(int domain, struct socket **so |
|
Line 615 fsocreate(int domain, struct socket **so |
|
} |
} |
|
|
int |
int |
|
sofamily(const struct socket *so) |
|
{ |
|
const struct protosw *pr; |
|
const struct domain *dom; |
|
|
|
if ((pr = so->so_proto) == NULL) |
|
return AF_UNSPEC; |
|
if ((dom = pr->pr_domain) == NULL) |
|
return AF_UNSPEC; |
|
return dom->dom_family; |
|
} |
|
|
|
int |
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
{ |
{ |
int error; |
int error; |
Line 685 soclose(struct socket *so) |
|
Line 769 soclose(struct socket *so) |
|
discard: |
discard: |
if (so->so_state & SS_NOFDREF) |
if (so->so_state & SS_NOFDREF) |
panic("soclose: NOFDREF"); |
panic("soclose: NOFDREF"); |
|
kauth_cred_free(so->so_cred); |
so->so_state |= SS_NOFDREF; |
so->so_state |= SS_NOFDREF; |
sofree(so); |
sofree(so); |
return (error); |
return (error); |
Line 817 sosend(struct socket *so, struct mbuf *a |
|
Line 902 sosend(struct socket *so, struct mbuf *a |
|
struct proc *p; |
struct proc *p; |
long space, len, resid, clen, mlen; |
long space, len, resid, clen, mlen; |
int error, s, dontroute, atomic; |
int error, s, dontroute, atomic; |
|
short wakeup_state = 0; |
|
|
p = l->l_proc; |
p = l->l_proc; |
sodopendfree(); |
sodopendfree(); |
Line 891 sosend(struct socket *so, struct mbuf *a |
|
Line 977 sosend(struct socket *so, struct mbuf *a |
|
goto release; |
goto release; |
} |
} |
sbunlock(&so->so_snd); |
sbunlock(&so->so_snd); |
|
if (wakeup_state & SS_RESTARTSYS) { |
|
error = ERESTART; |
|
goto out; |
|
} |
error = sbwait(&so->so_snd); |
error = sbwait(&so->so_snd); |
if (error) |
if (error) |
goto out; |
goto out; |
|
wakeup_state = so->so_state; |
goto restart; |
goto restart; |
} |
} |
|
wakeup_state = 0; |
mp = ⊤ |
mp = ⊤ |
space -= clen; |
space -= clen; |
do { |
do { |
Line 1071 soreceive(struct socket *so, struct mbuf |
|
Line 1163 soreceive(struct socket *so, struct mbuf |
|
struct mbuf *nextrecord; |
struct mbuf *nextrecord; |
int mbuf_removed = 0; |
int mbuf_removed = 0; |
const struct domain *dom; |
const struct domain *dom; |
|
short wakeup_state = 0; |
|
|
pr = so->so_proto; |
pr = so->so_proto; |
atomic = pr->pr_flags & PR_ATOMIC; |
atomic = pr->pr_flags & PR_ATOMIC; |
Line 1185 soreceive(struct socket *so, struct mbuf |
|
Line 1278 soreceive(struct socket *so, struct mbuf |
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); |
sbunlock(&so->so_rcv); |
sbunlock(&so->so_rcv); |
error = sbwait(&so->so_rcv); |
if (wakeup_state & SS_RESTARTSYS) |
|
error = ERESTART; |
|
else |
|
error = sbwait(&so->so_rcv); |
if (error != 0) { |
if (error != 0) { |
sounlock(so); |
sounlock(so); |
splx(s); |
splx(s); |
return error; |
return error; |
} |
} |
|
wakeup_state = so->so_state; |
goto restart; |
goto restart; |
} |
} |
dontblock: |
dontblock: |
Line 1329 soreceive(struct socket *so, struct mbuf |
|
Line 1426 soreceive(struct socket *so, struct mbuf |
|
panic("receive 3"); |
panic("receive 3"); |
#endif |
#endif |
so->so_state &= ~SS_RCVATMARK; |
so->so_state &= ~SS_RCVATMARK; |
|
wakeup_state = 0; |
len = uio->uio_resid; |
len = uio->uio_resid; |
if (so->so_oobmark && len > so->so_oobmark - offset) |
if (so->so_oobmark && len > so->so_oobmark - offset) |
len = so->so_oobmark - offset; |
len = so->so_oobmark - offset; |
Line 1461 soreceive(struct socket *so, struct mbuf |
|
Line 1559 soreceive(struct socket *so, struct mbuf |
|
NULL, (struct mbuf *)(long)flags, NULL, l); |
NULL, (struct mbuf *)(long)flags, NULL, l); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); |
error = sbwait(&so->so_rcv); |
if (wakeup_state & SS_RESTARTSYS) |
|
error = ERESTART; |
|
else |
|
error = sbwait(&so->so_rcv); |
if (error != 0) { |
if (error != 0) { |
sbunlock(&so->so_rcv); |
sbunlock(&so->so_rcv); |
sounlock(so); |
sounlock(so); |
Line 1470 soreceive(struct socket *so, struct mbuf |
|
Line 1571 soreceive(struct socket *so, struct mbuf |
|
} |
} |
if ((m = so->so_rcv.sb_mb) != NULL) |
if ((m = so->so_rcv.sb_mb) != NULL) |
nextrecord = m->m_nextpkt; |
nextrecord = m->m_nextpkt; |
|
wakeup_state = so->so_state; |
} |
} |
} |
} |
|
|
Line 1536 soshutdown(struct socket *so, int how) |
|
Line 1638 soshutdown(struct socket *so, int how) |
|
return error; |
return error; |
} |
} |
|
|
int |
void |
sodrain(struct socket *so) |
sorestart(struct socket *so) |
{ |
{ |
int error; |
/* |
|
* An application has called close() on an fd on which another |
|
* of its threads has called a socket system call. |
|
* Mark this and wake everyone up, and code that would block again |
|
* instead returns ERESTART. |
|
* On system call re-entry the fd is validated and EBADF returned. |
|
* Any other fd will block again on the 2nd syscall. |
|
*/ |
solock(so); |
solock(so); |
so->so_state |= SS_ISDRAINING; |
so->so_state |= SS_RESTARTSYS; |
cv_broadcast(&so->so_cv); |
cv_broadcast(&so->so_cv); |
error = soshutdown(so, SHUT_RDWR); |
cv_broadcast(&so->so_snd.sb_cv); |
|
cv_broadcast(&so->so_rcv.sb_cv); |
sounlock(so); |
sounlock(so); |
|
|
return error; |
|
} |
} |
|
|
void |
void |
Line 1585 sorflush(struct socket *so) |
|
Line 1692 sorflush(struct socket *so) |
|
static int |
static int |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
{ |
{ |
int error, optval; |
int error = EINVAL, optval, opt; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch (sopt->sopt_name) { |
switch ((opt = sopt->sopt_name)) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_setopt(so, sopt); |
error = accept_filt_setopt(so, sopt); |
Line 1622 sosetopt1(struct socket *so, const struc |
|
Line 1729 sosetopt1(struct socket *so, const struc |
|
case SO_REUSEPORT: |
case SO_REUSEPORT: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
|
#ifdef SO_OTIMESTAMP |
|
case SO_OTIMESTAMP: |
|
#endif |
error = sockopt_getint(sopt, &optval); |
error = sockopt_getint(sopt, &optval); |
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
if (optval) |
if (optval) |
so->so_options |= sopt->sopt_name; |
so->so_options |= opt; |
else |
else |
so->so_options &= ~sopt->sopt_name; |
so->so_options &= ~opt; |
break; |
break; |
|
|
case SO_SNDBUF: |
case SO_SNDBUF: |
Line 1650 sosetopt1(struct socket *so, const struc |
|
Line 1760 sosetopt1(struct socket *so, const struc |
|
break; |
break; |
} |
} |
|
|
switch (sopt->sopt_name) { |
switch (opt) { |
case SO_SNDBUF: |
case SO_SNDBUF: |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
error = ENOBUFS; |
error = ENOBUFS; |
Line 1687 sosetopt1(struct socket *so, const struc |
|
Line 1797 sosetopt1(struct socket *so, const struc |
|
} |
} |
break; |
break; |
|
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
error = sockopt_get(sopt, &otv, sizeof(otv)); |
|
if (error) { |
|
solock(so); |
|
break; |
|
} |
|
timeval50_to_timeval(&otv, &tv); |
|
opt = opt == SO_OSNDTIMEO ? SO_SNDTIMEO : SO_RCVTIMEO; |
|
error = 0; |
|
/*FALLTHROUGH*/ |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
error = sockopt_get(sopt, &tv, sizeof(tv)); |
if (error) |
|
error = sockopt_get(sopt, &tv, sizeof(tv)); |
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
Line 1703 sosetopt1(struct socket *so, const struc |
|
Line 1830 sosetopt1(struct socket *so, const struc |
|
if (optval == 0 && tv.tv_usec != 0) |
if (optval == 0 && tv.tv_usec != 0) |
optval = 1; |
optval = 1; |
|
|
switch (sopt->sopt_name) { |
switch (opt) { |
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
so->so_snd.sb_timeo = optval; |
so->so_snd.sb_timeo = optval; |
break; |
break; |
Line 1776 so_setsockopt(struct lwp *l, struct sock |
|
Line 1903 so_setsockopt(struct lwp *l, struct sock |
|
static int |
static int |
sogetopt1(struct socket *so, struct sockopt *sopt) |
sogetopt1(struct socket *so, struct sockopt *sopt) |
{ |
{ |
int error, optval; |
int error, optval, opt; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch (sopt->sopt_name) { |
switch ((opt = sopt->sopt_name)) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_getopt(so, sopt); |
error = accept_filt_getopt(so, sopt); |
Line 1802 sogetopt1(struct socket *so, struct sock |
|
Line 1929 sogetopt1(struct socket *so, struct sock |
|
case SO_BROADCAST: |
case SO_BROADCAST: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
error = sockopt_setint(sopt, |
#ifdef SO_OTIMESTAMP |
(so->so_options & sopt->sopt_name) ? 1 : 0); |
case SO_OTIMESTAMP: |
|
#endif |
|
error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); |
break; |
break; |
|
|
case SO_TYPE: |
case SO_TYPE: |
Line 1831 sogetopt1(struct socket *so, struct sock |
|
Line 1960 sogetopt1(struct socket *so, struct sock |
|
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
break; |
break; |
|
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
|
|
optval = (opt == SO_OSNDTIMEO ? |
|
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
|
otv.tv_sec = optval / hz; |
|
otv.tv_usec = (optval % hz) * tick; |
|
|
|
error = sockopt_set(sopt, &otv, sizeof(otv)); |
|
break; |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
optval = (sopt->sopt_name == SO_SNDTIMEO ? |
optval = (opt == SO_SNDTIMEO ? |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
tv.tv_sec = optval / hz; |
tv.tv_sec = optval / hz; |
Line 2274 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
Line 2419 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
return (error); |
return (error); |
} |
} |
|
|
SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") |
static void |
|
sysctl_kern_somaxkva_setup(void) |
{ |
{ |
|
|
sysctl_createv(clog, 0, NULL, NULL, |
KASSERT(socket_sysctllog == NULL); |
|
sysctl_createv(&socket_sysctllog, 0, NULL, NULL, |
CTLFLAG_PERMANENT, |
CTLFLAG_PERMANENT, |
CTLTYPE_NODE, "kern", NULL, |
CTLTYPE_NODE, "kern", NULL, |
NULL, 0, NULL, 0, |
NULL, 0, NULL, 0, |
CTL_KERN, CTL_EOL); |
CTL_KERN, CTL_EOL); |
|
|
sysctl_createv(clog, 0, NULL, NULL, |
sysctl_createv(&socket_sysctllog, 0, NULL, NULL, |
CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
CTLFLAG_PERMANENT|CTLFLAG_READWRITE, |
CTLTYPE_INT, "somaxkva", |
CTLTYPE_INT, "somaxkva", |
SYSCTL_DESCR("Maximum amount of kernel memory to be " |
SYSCTL_DESCR("Maximum amount of kernel memory to be " |