version 1.178, 2008/12/07 20:58:46 |
version 1.202.2.1, 2011/02/08 16:20:00 |
|
|
/* $NetBSD$ */ |
/* $NetBSD$ */ |
|
|
/*- |
/*- |
* Copyright (c) 2002, 2007, 2008 The NetBSD Foundation, Inc. |
* Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. |
* All rights reserved. |
* All rights reserved. |
* |
* |
* This code is derived from software contributed to The NetBSD Foundation |
* This code is derived from software contributed to The NetBSD Foundation |
* by Jason R. Thorpe of Wasabi Systems, Inc. |
* by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran. |
* |
* |
* Redistribution and use in source and binary forms, with or without |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* modification, are permitted provided that the following conditions |
|
|
#include <sys/cdefs.h> |
#include <sys/cdefs.h> |
__KERNEL_RCSID(0, "$NetBSD$"); |
__KERNEL_RCSID(0, "$NetBSD$"); |
|
|
|
#include "opt_compat_netbsd.h" |
#include "opt_sock_counters.h" |
#include "opt_sock_counters.h" |
#include "opt_sosend_loan.h" |
#include "opt_sosend_loan.h" |
#include "opt_mbuftrace.h" |
#include "opt_mbuftrace.h" |
Line 92 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 93 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/mutex.h> |
#include <sys/mutex.h> |
#include <sys/condvar.h> |
#include <sys/condvar.h> |
|
|
#include <uvm/uvm.h> |
#ifdef COMPAT_50 |
|
#include <compat/sys/time.h> |
|
#include <compat/sys/socket.h> |
|
#endif |
|
|
|
#include <uvm/uvm_extern.h> |
|
#include <uvm/uvm_loan.h> |
|
#include <uvm/uvm_page.h> |
|
|
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
Line 145 int somaxkva = SOMAXKVA; |
|
Line 153 int somaxkva = SOMAXKVA; |
|
static int socurkva; |
static int socurkva; |
static kcondvar_t socurkva_cv; |
static kcondvar_t socurkva_cv; |
|
|
|
static kauth_listener_t socket_listener; |
|
|
#define SOCK_LOAN_CHUNK 65536 |
#define SOCK_LOAN_CHUNK 65536 |
|
|
static size_t sodopendfree(void); |
static size_t sodopendfree(void); |
Line 378 sosend_loan(struct socket *so, struct ui |
|
Line 388 sosend_loan(struct socket *so, struct ui |
|
|
|
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), |
VM_PROT_READ); |
VM_PROT_READ, 0); |
pmap_update(pmap_kernel()); |
pmap_update(pmap_kernel()); |
|
|
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
Line 422 getsombuf(struct socket *so, int type) |
|
Line 432 getsombuf(struct socket *so, int type) |
|
return m; |
return m; |
} |
} |
|
|
|
static int |
|
socket_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, |
|
void *arg0, void *arg1, void *arg2, void *arg3) |
|
{ |
|
int result; |
|
enum kauth_network_req req; |
|
|
|
result = KAUTH_RESULT_DEFER; |
|
req = (enum kauth_network_req)arg0; |
|
|
|
if ((action != KAUTH_NETWORK_SOCKET) && |
|
(action != KAUTH_NETWORK_BIND)) |
|
return result; |
|
|
|
switch (req) { |
|
case KAUTH_REQ_NETWORK_BIND_PORT: |
|
result = KAUTH_RESULT_ALLOW; |
|
break; |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_DROP: { |
|
/* Normal users can only drop their own connections. */ |
|
struct socket *so = (struct socket *)arg1; |
|
|
|
if (proc_uidmatch(cred, so->so_cred)) |
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
} |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_OPEN: |
|
/* We allow "raw" routing/bluetooth sockets to anyone. */ |
|
if ((u_long)arg1 == PF_ROUTE || (u_long)arg1 == PF_OROUTE |
|
|| (u_long)arg1 == PF_BLUETOOTH) { |
|
result = KAUTH_RESULT_ALLOW; |
|
} else { |
|
/* Privileged, let secmodel handle this. */ |
|
if ((u_long)arg2 == SOCK_RAW) |
|
break; |
|
} |
|
|
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
|
|
case KAUTH_REQ_NETWORK_SOCKET_CANSEE: |
|
result = KAUTH_RESULT_ALLOW; |
|
|
|
break; |
|
|
|
default: |
|
break; |
|
} |
|
|
|
return result; |
|
} |
|
|
void |
void |
soinit(void) |
soinit(void) |
{ |
{ |
|
|
|
|
callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, |
callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, |
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
&sokva_reclaimerentry, NULL, sokva_reclaim_callback); |
|
|
|
socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, |
|
socket_listener_cb, NULL); |
} |
} |
|
|
/* |
/* |
Line 495 socreate(int dom, struct socket **aso, i |
|
Line 564 socreate(int dom, struct socket **aso, i |
|
#endif |
#endif |
uid = kauth_cred_geteuid(l->l_cred); |
uid = kauth_cred_geteuid(l->l_cred); |
so->so_uidinfo = uid_find(uid); |
so->so_uidinfo = uid_find(uid); |
so->so_egid = kauth_cred_getegid(l->l_cred); |
|
so->so_cpid = l->l_proc->p_pid; |
so->so_cpid = l->l_proc->p_pid; |
if (lockso != NULL) { |
if (lockso != NULL) { |
/* Caller wants us to share a lock. */ |
/* Caller wants us to share a lock. */ |
Line 514 socreate(int dom, struct socket **aso, i |
|
Line 582 socreate(int dom, struct socket **aso, i |
|
sofree(so); |
sofree(so); |
return error; |
return error; |
} |
} |
|
so->so_cred = kauth_cred_dup(l->l_cred); |
sounlock(so); |
sounlock(so); |
*aso = so; |
*aso = so; |
return 0; |
return 0; |
Line 549 fsocreate(int domain, struct socket **so |
|
Line 618 fsocreate(int domain, struct socket **so |
|
} |
} |
|
|
int |
int |
|
sofamily(const struct socket *so) |
|
{ |
|
const struct protosw *pr; |
|
const struct domain *dom; |
|
|
|
if ((pr = so->so_proto) == NULL) |
|
return AF_UNSPEC; |
|
if ((dom = pr->pr_domain) == NULL) |
|
return AF_UNSPEC; |
|
return dom->dom_family; |
|
} |
|
|
|
int |
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
sobind(struct socket *so, struct mbuf *nam, struct lwp *l) |
{ |
{ |
int error; |
int error; |
Line 674 soclose(struct socket *so) |
|
Line 756 soclose(struct socket *so) |
|
if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio) |
if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio) |
goto drop; |
goto drop; |
while (so->so_state & SS_ISCONNECTED) { |
while (so->so_state & SS_ISCONNECTED) { |
error = sowait(so, so->so_linger * hz); |
error = sowait(so, true, so->so_linger * hz); |
if (error) |
if (error) |
break; |
break; |
} |
} |
Line 690 soclose(struct socket *so) |
|
Line 772 soclose(struct socket *so) |
|
discard: |
discard: |
if (so->so_state & SS_NOFDREF) |
if (so->so_state & SS_NOFDREF) |
panic("soclose: NOFDREF"); |
panic("soclose: NOFDREF"); |
|
kauth_cred_free(so->so_cred); |
so->so_state |= SS_NOFDREF; |
so->so_state |= SS_NOFDREF; |
sofree(so); |
sofree(so); |
return (error); |
return (error); |
Line 822 sosend(struct socket *so, struct mbuf *a |
|
Line 905 sosend(struct socket *so, struct mbuf *a |
|
struct proc *p; |
struct proc *p; |
long space, len, resid, clen, mlen; |
long space, len, resid, clen, mlen; |
int error, s, dontroute, atomic; |
int error, s, dontroute, atomic; |
|
short wakeup_state = 0; |
|
|
p = l->l_proc; |
p = l->l_proc; |
sodopendfree(); |
sodopendfree(); |
Line 896 sosend(struct socket *so, struct mbuf *a |
|
Line 980 sosend(struct socket *so, struct mbuf *a |
|
goto release; |
goto release; |
} |
} |
sbunlock(&so->so_snd); |
sbunlock(&so->so_snd); |
|
if (wakeup_state & SS_RESTARTSYS) { |
|
error = ERESTART; |
|
goto out; |
|
} |
error = sbwait(&so->so_snd); |
error = sbwait(&so->so_snd); |
if (error) |
if (error) |
goto out; |
goto out; |
|
wakeup_state = so->so_state; |
goto restart; |
goto restart; |
} |
} |
|
wakeup_state = 0; |
mp = ⊤ |
mp = ⊤ |
space -= clen; |
space -= clen; |
do { |
do { |
Line 935 sosend(struct socket *so, struct mbuf *a |
|
Line 1025 sosend(struct socket *so, struct mbuf *a |
|
} |
} |
if (resid >= MINCLSIZE && space >= MCLBYTES) { |
if (resid >= MINCLSIZE && space >= MCLBYTES) { |
SOSEND_COUNTER_INCR(&sosend_copy_big); |
SOSEND_COUNTER_INCR(&sosend_copy_big); |
m_clget(m, M_WAIT); |
m_clget(m, M_DONTWAIT); |
if ((m->m_flags & M_EXT) == 0) |
if ((m->m_flags & M_EXT) == 0) |
goto nopages; |
goto nopages; |
mlen = MCLBYTES; |
mlen = MCLBYTES; |
Line 1076 soreceive(struct socket *so, struct mbuf |
|
Line 1166 soreceive(struct socket *so, struct mbuf |
|
struct mbuf *nextrecord; |
struct mbuf *nextrecord; |
int mbuf_removed = 0; |
int mbuf_removed = 0; |
const struct domain *dom; |
const struct domain *dom; |
|
short wakeup_state = 0; |
|
|
pr = so->so_proto; |
pr = so->so_proto; |
atomic = pr->pr_flags & PR_ATOMIC; |
atomic = pr->pr_flags & PR_ATOMIC; |
Line 1190 soreceive(struct socket *so, struct mbuf |
|
Line 1281 soreceive(struct socket *so, struct mbuf |
|
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); |
sbunlock(&so->so_rcv); |
sbunlock(&so->so_rcv); |
error = sbwait(&so->so_rcv); |
if (wakeup_state & SS_RESTARTSYS) |
|
error = ERESTART; |
|
else |
|
error = sbwait(&so->so_rcv); |
if (error != 0) { |
if (error != 0) { |
sounlock(so); |
sounlock(so); |
splx(s); |
splx(s); |
return error; |
return error; |
} |
} |
|
wakeup_state = so->so_state; |
goto restart; |
goto restart; |
} |
} |
dontblock: |
dontblock: |
Line 1334 soreceive(struct socket *so, struct mbuf |
|
Line 1429 soreceive(struct socket *so, struct mbuf |
|
panic("receive 3"); |
panic("receive 3"); |
#endif |
#endif |
so->so_state &= ~SS_RCVATMARK; |
so->so_state &= ~SS_RCVATMARK; |
|
wakeup_state = 0; |
len = uio->uio_resid; |
len = uio->uio_resid; |
if (so->so_oobmark && len > so->so_oobmark - offset) |
if (so->so_oobmark && len > so->so_oobmark - offset) |
len = so->so_oobmark - offset; |
len = so->so_oobmark - offset; |
Line 1466 soreceive(struct socket *so, struct mbuf |
|
Line 1562 soreceive(struct socket *so, struct mbuf |
|
NULL, (struct mbuf *)(long)flags, NULL, l); |
NULL, (struct mbuf *)(long)flags, NULL, l); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); |
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); |
error = sbwait(&so->so_rcv); |
if (wakeup_state & SS_RESTARTSYS) |
|
error = ERESTART; |
|
else |
|
error = sbwait(&so->so_rcv); |
if (error != 0) { |
if (error != 0) { |
sbunlock(&so->so_rcv); |
sbunlock(&so->so_rcv); |
sounlock(so); |
sounlock(so); |
Line 1475 soreceive(struct socket *so, struct mbuf |
|
Line 1574 soreceive(struct socket *so, struct mbuf |
|
} |
} |
if ((m = so->so_rcv.sb_mb) != NULL) |
if ((m = so->so_rcv.sb_mb) != NULL) |
nextrecord = m->m_nextpkt; |
nextrecord = m->m_nextpkt; |
|
wakeup_state = so->so_state; |
} |
} |
} |
} |
|
|
Line 1542 soshutdown(struct socket *so, int how) |
|
Line 1642 soshutdown(struct socket *so, int how) |
|
} |
} |
|
|
void |
void |
|
sorestart(struct socket *so) |
|
{ |
|
/* |
|
* An application has called close() on an fd on which another |
|
* of its threads has called a socket system call. |
|
* Mark this and wake everyone up, and code that would block again |
|
* instead returns ERESTART. |
|
* On system call re-entry the fd is validated and EBADF returned. |
|
* Any other fd will block again on the 2nd syscall. |
|
*/ |
|
solock(so); |
|
so->so_state |= SS_RESTARTSYS; |
|
cv_broadcast(&so->so_cv); |
|
cv_broadcast(&so->so_snd.sb_cv); |
|
cv_broadcast(&so->so_rcv.sb_cv); |
|
sounlock(so); |
|
} |
|
|
|
void |
sorflush(struct socket *so) |
sorflush(struct socket *so) |
{ |
{ |
struct sockbuf *sb, asb; |
struct sockbuf *sb, asb; |
Line 1576 sorflush(struct socket *so) |
|
Line 1695 sorflush(struct socket *so) |
|
static int |
static int |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
sosetopt1(struct socket *so, const struct sockopt *sopt) |
{ |
{ |
int error, optval; |
int error = EINVAL, optval, opt; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch (sopt->sopt_name) { |
switch ((opt = sopt->sopt_name)) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_setopt(so, sopt); |
error = accept_filt_setopt(so, sopt); |
Line 1613 sosetopt1(struct socket *so, const struc |
|
Line 1732 sosetopt1(struct socket *so, const struc |
|
case SO_REUSEPORT: |
case SO_REUSEPORT: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
|
#ifdef SO_OTIMESTAMP |
|
case SO_OTIMESTAMP: |
|
#endif |
error = sockopt_getint(sopt, &optval); |
error = sockopt_getint(sopt, &optval); |
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
if (optval) |
if (optval) |
so->so_options |= sopt->sopt_name; |
so->so_options |= opt; |
else |
else |
so->so_options &= ~sopt->sopt_name; |
so->so_options &= ~opt; |
break; |
break; |
|
|
case SO_SNDBUF: |
case SO_SNDBUF: |
Line 1641 sosetopt1(struct socket *so, const struc |
|
Line 1763 sosetopt1(struct socket *so, const struc |
|
break; |
break; |
} |
} |
|
|
switch (sopt->sopt_name) { |
switch (opt) { |
case SO_SNDBUF: |
case SO_SNDBUF: |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { |
error = ENOBUFS; |
error = ENOBUFS; |
Line 1678 sosetopt1(struct socket *so, const struc |
|
Line 1800 sosetopt1(struct socket *so, const struc |
|
} |
} |
break; |
break; |
|
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
error = sockopt_get(sopt, &otv, sizeof(otv)); |
|
if (error) { |
|
solock(so); |
|
break; |
|
} |
|
timeval50_to_timeval(&otv, &tv); |
|
opt = opt == SO_OSNDTIMEO ? SO_SNDTIMEO : SO_RCVTIMEO; |
|
error = 0; |
|
/*FALLTHROUGH*/ |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
error = sockopt_get(sopt, &tv, sizeof(tv)); |
if (error) |
|
error = sockopt_get(sopt, &tv, sizeof(tv)); |
solock(so); |
solock(so); |
if (error) |
if (error) |
break; |
break; |
Line 1694 sosetopt1(struct socket *so, const struc |
|
Line 1833 sosetopt1(struct socket *so, const struc |
|
if (optval == 0 && tv.tv_usec != 0) |
if (optval == 0 && tv.tv_usec != 0) |
optval = 1; |
optval = 1; |
|
|
switch (sopt->sopt_name) { |
switch (opt) { |
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
so->so_snd.sb_timeo = optval; |
so->so_snd.sb_timeo = optval; |
break; |
break; |
Line 1767 so_setsockopt(struct lwp *l, struct sock |
|
Line 1906 so_setsockopt(struct lwp *l, struct sock |
|
static int |
static int |
sogetopt1(struct socket *so, struct sockopt *sopt) |
sogetopt1(struct socket *so, struct sockopt *sopt) |
{ |
{ |
int error, optval; |
int error, optval, opt; |
struct linger l; |
struct linger l; |
struct timeval tv; |
struct timeval tv; |
|
|
switch (sopt->sopt_name) { |
switch ((opt = sopt->sopt_name)) { |
|
|
case SO_ACCEPTFILTER: |
case SO_ACCEPTFILTER: |
error = accept_filt_getopt(so, sopt); |
error = accept_filt_getopt(so, sopt); |
Line 1793 sogetopt1(struct socket *so, struct sock |
|
Line 1932 sogetopt1(struct socket *so, struct sock |
|
case SO_BROADCAST: |
case SO_BROADCAST: |
case SO_OOBINLINE: |
case SO_OOBINLINE: |
case SO_TIMESTAMP: |
case SO_TIMESTAMP: |
error = sockopt_setint(sopt, |
#ifdef SO_OTIMESTAMP |
(so->so_options & sopt->sopt_name) ? 1 : 0); |
case SO_OTIMESTAMP: |
|
#endif |
|
error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); |
break; |
break; |
|
|
case SO_TYPE: |
case SO_TYPE: |
Line 1822 sogetopt1(struct socket *so, struct sock |
|
Line 1963 sogetopt1(struct socket *so, struct sock |
|
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
error = sockopt_setint(sopt, so->so_rcv.sb_lowat); |
break; |
break; |
|
|
|
#ifdef COMPAT_50 |
|
case SO_OSNDTIMEO: |
|
case SO_ORCVTIMEO: { |
|
struct timeval50 otv; |
|
|
|
optval = (opt == SO_OSNDTIMEO ? |
|
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
|
otv.tv_sec = optval / hz; |
|
otv.tv_usec = (optval % hz) * tick; |
|
|
|
error = sockopt_set(sopt, &otv, sizeof(otv)); |
|
break; |
|
} |
|
#endif /* COMPAT_50 */ |
|
|
case SO_SNDTIMEO: |
case SO_SNDTIMEO: |
case SO_RCVTIMEO: |
case SO_RCVTIMEO: |
optval = (sopt->sopt_name == SO_SNDTIMEO ? |
optval = (opt == SO_SNDTIMEO ? |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
|
|
tv.tv_sec = optval / hz; |
tv.tv_sec = optval / hz; |
Line 2034 sohasoutofband(struct socket *so) |
|
Line 2191 sohasoutofband(struct socket *so) |
|
{ |
{ |
|
|
fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); |
fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); |
selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, 0); |
selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, NOTE_SUBMIT); |
} |
} |
|
|
static void |
static void |
Line 2266 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
Line 2423 sysctl_kern_somaxkva(SYSCTLFN_ARGS) |
|
} |
} |
|
|
static void |
static void |
sysctl_kern_somaxkva_setup() |
sysctl_kern_somaxkva_setup(void) |
{ |
{ |
|
|
KASSERT(socket_sysctllog == NULL); |
KASSERT(socket_sysctllog == NULL); |