version 1.113.2.6, 2010/08/11 22:54:44 |
version 1.114, 2008/04/28 20:24:05 |
|
|
/* $NetBSD$ */ |
/* $NetBSD$ */ |
|
|
/*- |
/*- |
* Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc. |
* Copyright (c) 1998, 2000, 2004, 2008 The NetBSD Foundation, Inc. |
* All rights reserved. |
* All rights reserved. |
* |
* |
* This code is derived from software contributed to The NetBSD Foundation |
* This code is derived from software contributed to The NetBSD Foundation |
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
* by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, |
* NASA Ames Research Center, and by Andrew Doran. |
* NASA Ames Research Center. |
* |
* |
* Redistribution and use in source and binary forms, with or without |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* modification, are permitted provided that the following conditions |
Line 116 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 116 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/kauth.h> |
#include <sys/kauth.h> |
#include <sys/kmem.h> |
#include <sys/kmem.h> |
#include <sys/atomic.h> |
#include <sys/atomic.h> |
#include <sys/uidinfo.h> |
|
#include <sys/kernel.h> |
|
#include <sys/kthread.h> |
|
|
|
/* |
/* |
* Unix communications domain. |
* Unix communications domain. |
Line 171 const struct sockaddr_un sun_noname = { |
|
Line 168 const struct sockaddr_un sun_noname = { |
|
ino_t unp_ino; /* prototype for fake inode numbers */ |
ino_t unp_ino; /* prototype for fake inode numbers */ |
|
|
struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *); |
struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *); |
static void unp_mark(file_t *); |
|
static void unp_scan(struct mbuf *, void (*)(file_t *), int); |
|
static void unp_discard_now(file_t *); |
|
static void unp_discard_later(file_t *); |
|
static void unp_thread(void *); |
|
static void unp_thread_kick(void); |
|
static kmutex_t *uipc_lock; |
static kmutex_t *uipc_lock; |
|
|
static kcondvar_t unp_thread_cv; |
|
static lwp_t *unp_thread_lwp; |
|
static SLIST_HEAD(,file) unp_thread_discard; |
|
static int unp_defer; |
|
|
|
/* |
/* |
* Initialize Unix protocols. |
* Initialize Unix protocols. |
*/ |
*/ |
void |
void |
uipc_init(void) |
uipc_init(void) |
{ |
{ |
int error; |
|
|
|
uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); |
cv_init(&unp_thread_cv, "unpgc"); |
|
|
|
error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread, |
|
NULL, &unp_thread_lwp, "unpgc"); |
|
if (error != 0) |
|
panic("uipc_init %d", error); |
|
} |
} |
|
|
/* |
/* |
Line 223 unp_setpeerlocks(struct socket *so, stru |
|
Line 202 unp_setpeerlocks(struct socket *so, stru |
|
* with the head when the pair of sockets stand completely |
* with the head when the pair of sockets stand completely |
* on their own. |
* on their own. |
*/ |
*/ |
KASSERT(so->so_head == NULL); |
if (so->so_head != NULL || so2->so_head != NULL) |
if (so2->so_head != NULL) |
|
return; |
return; |
|
|
/* |
/* |
Line 252 unp_setpeerlocks(struct socket *so, stru |
|
Line 230 unp_setpeerlocks(struct socket *so, stru |
|
unp->unp_streamlock = NULL; |
unp->unp_streamlock = NULL; |
mutex_obj_hold(lock); |
mutex_obj_hold(lock); |
membar_exit(); |
membar_exit(); |
/* |
so->so_lock = lock; |
* possible race if lock is not held - see comment in |
so2->so_lock = lock; |
* uipc_usrreq(PRU_ACCEPT). |
|
*/ |
|
KASSERT(mutex_owned(lock)); |
|
solockreset(so, lock); |
|
solockreset(so2, lock); |
|
} |
} |
|
|
/* |
/* |
Line 281 unp_resetlock(struct socket *so) |
|
Line 254 unp_resetlock(struct socket *so) |
|
unp->unp_streamlock = olock; |
unp->unp_streamlock = olock; |
mutex_obj_hold(nlock); |
mutex_obj_hold(nlock); |
mutex_enter(nlock); |
mutex_enter(nlock); |
solockreset(so, nlock); |
so->so_lock = nlock; |
mutex_exit(olock); |
mutex_exit(olock); |
} |
} |
|
|
Line 316 unp_output(struct mbuf *m, struct mbuf * |
|
Line 289 unp_output(struct mbuf *m, struct mbuf * |
|
if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m, |
if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m, |
control) == 0) { |
control) == 0) { |
so2->so_rcv.sb_overflowed++; |
so2->so_rcv.sb_overflowed++; |
|
sounlock(so2); |
unp_dispose(control); |
unp_dispose(control); |
m_freem(control); |
m_freem(control); |
m_freem(m); |
m_freem(m); |
|
solock(so2); |
return (ENOBUFS); |
return (ENOBUFS); |
} else { |
} else { |
sorwakeup(so2); |
sorwakeup(so2); |
Line 333 unp_setaddr(struct socket *so, struct mb |
|
Line 308 unp_setaddr(struct socket *so, struct mb |
|
struct unpcb *unp; |
struct unpcb *unp; |
bool ext; |
bool ext; |
|
|
KASSERT(solocked(so)); |
|
unp = sotounpcb(so); |
unp = sotounpcb(so); |
ext = false; |
ext = false; |
|
|
Line 382 uipc_usrreq(struct socket *so, int req, |
|
Line 356 uipc_usrreq(struct socket *so, int req, |
|
#endif |
#endif |
p = l ? l->l_proc : NULL; |
p = l ? l->l_proc : NULL; |
if (req != PRU_ATTACH) { |
if (req != PRU_ATTACH) { |
if (unp == NULL) { |
if (unp == 0) { |
error = EINVAL; |
error = EINVAL; |
goto release; |
goto release; |
} |
} |
Line 392 uipc_usrreq(struct socket *so, int req, |
|
Line 366 uipc_usrreq(struct socket *so, int req, |
|
switch (req) { |
switch (req) { |
|
|
case PRU_ATTACH: |
case PRU_ATTACH: |
if (unp != NULL) { |
if (unp != 0) { |
error = EISCONN; |
error = EISCONN; |
break; |
break; |
} |
} |
Line 414 uipc_usrreq(struct socket *so, int req, |
|
Line 388 uipc_usrreq(struct socket *so, int req, |
|
* locked by uipc_lock. |
* locked by uipc_lock. |
*/ |
*/ |
unp_resetlock(so); |
unp_resetlock(so); |
if (unp->unp_vnode == NULL) |
if (unp->unp_vnode == 0) |
error = EINVAL; |
error = EINVAL; |
break; |
break; |
|
|
Line 450 uipc_usrreq(struct socket *so, int req, |
|
Line 424 uipc_usrreq(struct socket *so, int req, |
|
* If the connection is fully established, break the |
* If the connection is fully established, break the |
* association with uipc_lock and give the connected |
* association with uipc_lock and give the connected |
* pair a seperate lock to share. |
* pair a seperate lock to share. |
* There is a race here: sotounpcb(so2)->unp_streamlock |
|
* is not locked, so when changing so2->so_lock |
|
* another thread can grab it while so->so_lock is still |
|
* pointing to the (locked) uipc_lock. |
|
* this should be harmless, except that this makes |
|
* solocked2() and solocked() unreliable. |
|
* Another problem is that unp_setaddr() expects the |
|
* the socket locked. Grabing sotounpcb(so2)->unp_streamlock |
|
* fixes both issues. |
|
*/ |
*/ |
mutex_enter(sotounpcb(so2)->unp_streamlock); |
|
unp_setpeerlocks(so2, so); |
unp_setpeerlocks(so2, so); |
/* |
/* |
* Only now return peer's address, as we may need to |
* Only now return peer's address, as we may need to |
Line 471 uipc_usrreq(struct socket *so, int req, |
|
Line 435 uipc_usrreq(struct socket *so, int req, |
|
* error == 0 and sun_noname as the peer address. |
* error == 0 and sun_noname as the peer address. |
*/ |
*/ |
unp_setaddr(so, nam, true); |
unp_setaddr(so, nam, true); |
/* so_lock now points to unp_streamlock */ |
|
mutex_exit(so2->so_lock); |
|
break; |
break; |
|
|
case PRU_SHUTDOWN: |
case PRU_SHUTDOWN: |
Line 555 uipc_usrreq(struct socket *so, int req, |
|
Line 517 uipc_usrreq(struct socket *so, int req, |
|
error = ENOTCONN; |
error = ENOTCONN; |
} |
} |
if (error) { |
if (error) { |
|
sounlock(so); |
unp_dispose(control); |
unp_dispose(control); |
m_freem(control); |
m_freem(control); |
m_freem(m); |
m_freem(m); |
|
solock(so); |
break; |
break; |
} |
} |
KASSERT(p != NULL); |
KASSERT(p != NULL); |
Line 606 uipc_usrreq(struct socket *so, int req, |
|
Line 570 uipc_usrreq(struct socket *so, int req, |
|
#undef snd |
#undef snd |
#undef rcv |
#undef rcv |
if (control != NULL) { |
if (control != NULL) { |
|
sounlock(so); |
unp_dispose(control); |
unp_dispose(control); |
m_freem(control); |
m_freem(control); |
|
solock(so); |
} |
} |
break; |
break; |
|
|
Line 621 uipc_usrreq(struct socket *so, int req, |
|
Line 587 uipc_usrreq(struct socket *so, int req, |
|
|
|
KASSERT(so->so_head == NULL); |
KASSERT(so->so_head == NULL); |
#ifdef DIAGNOSTIC |
#ifdef DIAGNOSTIC |
if (so->so_pcb == NULL) |
if (so->so_pcb == 0) |
panic("uipc 5: drop killed pcb"); |
panic("uipc 5: drop killed pcb"); |
#endif |
#endif |
unp_detach(unp); |
unp_detach(unp); |
|
|
* Unix domain socket option processing. |
* Unix domain socket option processing. |
*/ |
*/ |
int |
int |
uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt) |
uipc_ctloutput(int op, struct socket *so, int level, int optname, |
|
struct mbuf **mp) |
{ |
{ |
struct unpcb *unp = sotounpcb(so); |
struct unpcb *unp = sotounpcb(so); |
|
struct mbuf *m = *mp; |
int optval = 0, error = 0; |
int optval = 0, error = 0; |
|
|
KASSERT(solocked(so)); |
KASSERT(solocked(so)); |
|
|
if (sopt->sopt_level != 0) { |
if (level != 0) { |
error = ENOPROTOOPT; |
error = ENOPROTOOPT; |
|
if (op == PRCO_SETOPT && m) |
|
(void) m_free(m); |
} else switch (op) { |
} else switch (op) { |
|
|
case PRCO_SETOPT: |
case PRCO_SETOPT: |
switch (sopt->sopt_name) { |
switch (optname) { |
case LOCAL_CREDS: |
case LOCAL_CREDS: |
case LOCAL_CONNWAIT: |
case LOCAL_CONNWAIT: |
error = sockopt_getint(sopt, &optval); |
if (m == NULL || m->m_len != sizeof(int)) |
if (error) |
error = EINVAL; |
break; |
else { |
switch (sopt->sopt_name) { |
optval = *mtod(m, int *); |
|
switch (optname) { |
#define OPTSET(bit) \ |
#define OPTSET(bit) \ |
if (optval) \ |
if (optval) \ |
unp->unp_flags |= (bit); \ |
unp->unp_flags |= (bit); \ |
else \ |
else \ |
unp->unp_flags &= ~(bit); |
unp->unp_flags &= ~(bit); |
|
|
case LOCAL_CREDS: |
case LOCAL_CREDS: |
OPTSET(UNP_WANTCRED); |
OPTSET(UNP_WANTCRED); |
break; |
break; |
case LOCAL_CONNWAIT: |
case LOCAL_CONNWAIT: |
OPTSET(UNP_CONNWAIT); |
OPTSET(UNP_CONNWAIT); |
break; |
break; |
|
} |
} |
} |
break; |
break; |
#undef OPTSET |
#undef OPTSET |
Line 712 uipc_ctloutput(int op, struct socket *so |
|
Line 684 uipc_ctloutput(int op, struct socket *so |
|
error = ENOPROTOOPT; |
error = ENOPROTOOPT; |
break; |
break; |
} |
} |
|
if (m) |
|
(void) m_free(m); |
break; |
break; |
|
|
case PRCO_GETOPT: |
case PRCO_GETOPT: |
sounlock(so); |
sounlock(so); |
switch (sopt->sopt_name) { |
switch (optname) { |
case LOCAL_PEEREID: |
case LOCAL_PEEREID: |
if (unp->unp_flags & UNP_EIDSVALID) { |
if (unp->unp_flags & UNP_EIDSVALID) { |
error = sockopt_set(sopt, |
*mp = m = m_get(M_WAIT, MT_SOOPTS); |
&unp->unp_connid, sizeof(unp->unp_connid)); |
m->m_len = sizeof(struct unpcbid); |
|
*mtod(m, struct unpcbid *) = unp->unp_connid; |
} else { |
} else { |
error = EINVAL; |
error = EINVAL; |
} |
} |
break; |
break; |
case LOCAL_CREDS: |
case LOCAL_CREDS: |
|
*mp = m = m_get(M_WAIT, MT_SOOPTS); |
|
m->m_len = sizeof(int); |
|
|
#define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) |
#define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0) |
|
|
optval = OPTBIT(UNP_WANTCRED); |
optval = OPTBIT(UNP_WANTCRED); |
error = sockopt_setint(sopt, optval); |
*mtod(m, int *) = optval; |
break; |
break; |
#undef OPTBIT |
#undef OPTBIT |
|
|
Line 757 u_long unpst_recvspace = PIPSIZ; |
|
Line 735 u_long unpst_recvspace = PIPSIZ; |
|
u_long unpdg_sendspace = 2*1024; /* really max datagram size */ |
u_long unpdg_sendspace = 2*1024; /* really max datagram size */ |
u_long unpdg_recvspace = 4*1024; |
u_long unpdg_recvspace = 4*1024; |
|
|
u_int unp_rights; /* files in flight */ |
u_int unp_rights; /* file descriptors in flight */ |
u_int unp_rights_ratio = 2; /* limit, fraction of maxfiles */ |
|
|
|
int |
int |
unp_attach(struct socket *so) |
unp_attach(struct socket *so) |
Line 803 unp_attach(struct socket *so) |
|
Line 780 unp_attach(struct socket *so) |
|
unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); |
unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); |
if (unp == NULL) |
if (unp == NULL) |
return (ENOBUFS); |
return (ENOBUFS); |
memset(unp, 0, sizeof(*unp)); |
memset((void *)unp, 0, sizeof(*unp)); |
unp->unp_socket = so; |
unp->unp_socket = so; |
so->so_pcb = unp; |
so->so_pcb = unp; |
nanotime(&unp->unp_ctime); |
nanotime(&unp->unp_ctime); |
Line 842 unp_detach(struct unpcb *unp) |
|
Line 819 unp_detach(struct unpcb *unp) |
|
so->so_pcb = NULL; |
so->so_pcb = NULL; |
if (unp_rights) { |
if (unp_rights) { |
/* |
/* |
* Normally the receive buffer is flushed later, in sofree, |
* Normally the receive buffer is flushed later, |
* but if our receive buffer holds references to files that |
* in sofree, but if our receive buffer holds references |
* are now garbage, we will enqueue those file references to |
* to descriptors that are now garbage, we will dispose |
* the garbage collector and kick it into action. |
* of those descriptor references after the garbage collector |
|
* gets them (resulting in a "panic: closef: count < 0"). |
*/ |
*/ |
sorflush(so); |
sorflush(so); |
unp_free(unp); |
unp_free(unp); |
unp_thread_kick(); |
sounlock(so); |
|
unp_gc(); |
|
solock(so); |
} else |
} else |
unp_free(unp); |
unp_free(unp); |
} |
} |
Line 907 unp_bind(struct socket *so, struct mbuf |
|
Line 887 unp_bind(struct socket *so, struct mbuf |
|
error = EADDRINUSE; |
error = EADDRINUSE; |
goto bad; |
goto bad; |
} |
} |
vattr_null(&vattr); |
VATTR_NULL(&vattr); |
vattr.va_type = VSOCK; |
vattr.va_type = VSOCK; |
vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask); |
vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask); |
error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); |
error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); |
Line 923 unp_bind(struct socket *so, struct mbuf |
|
Line 903 unp_bind(struct socket *so, struct mbuf |
|
unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); |
unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred); |
unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); |
unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred); |
unp->unp_flags |= UNP_EIDSBIND; |
unp->unp_flags |= UNP_EIDSBIND; |
VOP_UNLOCK(vp); |
VOP_UNLOCK(vp, 0); |
unp->unp_flags &= ~UNP_BUSY; |
unp->unp_flags &= ~UNP_BUSY; |
return (0); |
return (0); |
|
|
Line 1004 unp_connect(struct socket *so, struct mb |
|
Line 984 unp_connect(struct socket *so, struct mb |
|
KASSERT((so->so_options & SO_ACCEPTCONN) == 0 || |
KASSERT((so->so_options & SO_ACCEPTCONN) == 0 || |
so2->so_lock == uipc_lock); |
so2->so_lock == uipc_lock); |
if ((so2->so_options & SO_ACCEPTCONN) == 0 || |
if ((so2->so_options & SO_ACCEPTCONN) == 0 || |
(so3 = sonewconn(so2, 0)) == NULL) { |
(so3 = sonewconn(so2, 0)) == 0) { |
error = ECONNREFUSED; |
error = ECONNREFUSED; |
sounlock(so); |
sounlock(so); |
goto bad; |
goto bad; |
Line 1057 unp_connect2(struct socket *so, struct s |
|
Line 1037 unp_connect2(struct socket *so, struct s |
|
* queue head (so->so_head, only if PR_CONNREQUIRED) |
* queue head (so->so_head, only if PR_CONNREQUIRED) |
*/ |
*/ |
KASSERT(solocked2(so, so2)); |
KASSERT(solocked2(so, so2)); |
KASSERT(so->so_head == NULL); |
if (so->so_head != NULL) { |
if (so2->so_head != NULL) { |
KASSERT(so->so_lock == uipc_lock); |
KASSERT(so2->so_lock == uipc_lock); |
KASSERT(solocked2(so, so->so_head)); |
KASSERT(solocked2(so2, so2->so_head)); |
|
} |
} |
|
|
unp2 = sotounpcb(so2); |
unp2 = sotounpcb(so2); |
Line 1088 unp_connect2(struct socket *so, struct s |
|
Line 1067 unp_connect2(struct socket *so, struct s |
|
* require that the locks already match (the sockets |
* require that the locks already match (the sockets |
* are created that way). |
* are created that way). |
*/ |
*/ |
if (req == PRU_CONNECT) { |
if (req == PRU_CONNECT) |
KASSERT(so2->so_head != NULL); |
|
unp_setpeerlocks(so, so2); |
unp_setpeerlocks(so, so2); |
} |
|
break; |
break; |
|
|
default: |
default: |
Line 1199 unp_externalize(struct mbuf *rights, str |
|
Line 1176 unp_externalize(struct mbuf *rights, str |
|
fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); |
fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); |
rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); |
rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); |
|
|
/* Make sure the recipient should be able to see the files.. */ |
/* Make sure the recipient should be able to see the descriptors.. */ |
if (p->p_cwdi->cwdi_rdir != NULL) { |
if (p->p_cwdi->cwdi_rdir != NULL) { |
rp = (file_t **)CMSG_DATA(cm); |
rp = (file_t **)CMSG_DATA(cm); |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
Line 1226 unp_externalize(struct mbuf *rights, str |
|
Line 1203 unp_externalize(struct mbuf *rights, str |
|
if (error != 0) { |
if (error != 0) { |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
fp = *rp; |
fp = *rp; |
|
/* |
|
* zero the pointer before calling unp_discard, |
|
* since it may end up in unp_gc().. |
|
*/ |
*rp++ = 0; |
*rp++ = 0; |
unp_discard_now(fp); |
unp_discard(fp); |
} |
} |
goto out; |
goto out; |
} |
} |
|
|
/* |
/* |
* First loop -- allocate file descriptor table slots for the |
* First loop -- allocate file descriptor table slots for the |
* new files. |
* new descriptors. |
*/ |
*/ |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
fp = *rp++; |
fp = *rp++; |
Line 1262 unp_externalize(struct mbuf *rights, str |
|
Line 1243 unp_externalize(struct mbuf *rights, str |
|
|
|
/* |
/* |
* Now that adding them has succeeded, update all of the |
* Now that adding them has succeeded, update all of the |
* file passing state and affix the descriptors. |
* descriptor passing state. |
*/ |
*/ |
rp = (file_t **)CMSG_DATA(cm); |
rp = (file_t **)CMSG_DATA(cm); |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
Line 1297 unp_externalize(struct mbuf *rights, str |
|
Line 1278 unp_externalize(struct mbuf *rights, str |
|
int |
int |
unp_internalize(struct mbuf **controlp) |
unp_internalize(struct mbuf **controlp) |
{ |
{ |
filedesc_t *fdescp = curlwp->l_fd; |
struct filedesc *fdescp = curlwp->l_fd; |
struct mbuf *control = *controlp; |
struct mbuf *control = *controlp; |
struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *); |
struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *); |
file_t **rp, **files; |
file_t **rp, **files; |
file_t *fp; |
file_t *fp; |
int i, fd, *fdp; |
int i, fd, *fdp; |
int nfds, error; |
int nfds, error; |
u_int maxmsg; |
|
|
|
error = 0; |
error = 0; |
newcm = NULL; |
newcm = NULL; |
|
|
/* Sanity check the control message header. */ |
/* Sanity check the control message header. */ |
if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || |
if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || |
cm->cmsg_len > control->m_len || |
cm->cmsg_len != control->m_len) |
cm->cmsg_len < CMSG_ALIGN(sizeof(*cm))) |
|
return (EINVAL); |
return (EINVAL); |
|
|
/* |
/* |
Line 1321 unp_internalize(struct mbuf **controlp) |
|
Line 1300 unp_internalize(struct mbuf **controlp) |
|
*/ |
*/ |
nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); |
nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int); |
fdp = (int *)CMSG_DATA(cm); |
fdp = (int *)CMSG_DATA(cm); |
maxmsg = maxfiles / unp_rights_ratio; |
|
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
fd = *fdp++; |
fd = *fdp++; |
if (atomic_inc_uint_nv(&unp_rights) > maxmsg) { |
|
atomic_dec_uint(&unp_rights); |
|
nfds = i; |
|
error = EAGAIN; |
|
goto out; |
|
} |
|
if ((fp = fd_getfile(fd)) == NULL) { |
if ((fp = fd_getfile(fd)) == NULL) { |
atomic_dec_uint(&unp_rights); |
nfds = i + 1; |
nfds = i; |
|
error = EBADF; |
error = EBADF; |
goto out; |
goto out; |
} |
} |
Line 1356 unp_internalize(struct mbuf **controlp) |
|
Line 1327 unp_internalize(struct mbuf **controlp) |
|
fdp = (int *)CMSG_DATA(cm) + nfds; |
fdp = (int *)CMSG_DATA(cm) + nfds; |
rp = files + nfds; |
rp = files + nfds; |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file; |
fp = fdescp->fd_ofiles[*--fdp]->ff_file; |
KASSERT(fp != NULL); |
KASSERT(fp != NULL); |
mutex_enter(&fp->f_lock); |
mutex_enter(&fp->f_lock); |
*--rp = fp; |
*--rp = fp; |
fp->f_count++; |
fp->f_count++; |
fp->f_msgcount++; |
fp->f_msgcount++; |
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
|
atomic_inc_uint(&unp_rights); |
} |
} |
|
|
out: |
out: |
Line 1370 unp_internalize(struct mbuf **controlp) |
|
Line 1342 unp_internalize(struct mbuf **controlp) |
|
fdp = (int *)CMSG_DATA(cm); |
fdp = (int *)CMSG_DATA(cm); |
for (i = 0; i < nfds; i++) { |
for (i = 0; i < nfds; i++) { |
fd_putfile(*fdp++); |
fd_putfile(*fdp++); |
if (error != 0) { |
|
atomic_dec_uint(&unp_rights); |
|
} |
|
} |
} |
|
|
if (error == 0) { |
if (error == 0) { |
Line 1445 unp_addsockcred(struct lwp *l, struct mb |
|
Line 1414 unp_addsockcred(struct lwp *l, struct mb |
|
return (control); |
return (control); |
} |
} |
|
|
|
int unp_defer, unp_gcing; |
|
extern struct domain unixdomain; |
|
|
/* |
/* |
* Do a mark-sweep GC of files in the system, to free up any which are |
* Comment added long after the fact explaining what's going on here. |
* caught in flight to an about-to-be-closed socket. Additionally, |
* Do a mark-sweep GC of file descriptors on the system, to free up |
* process deferred file closures. |
* any which are caught in flight to an about-to-be-closed socket. |
|
* |
|
* Traditional mark-sweep gc's start at the "root", and mark |
|
* everything reachable from the root (which, in our case would be the |
|
* process table). The mark bits are cleared during the sweep. |
|
* |
|
* XXX For some inexplicable reason (perhaps because the file |
|
* descriptor tables used to live in the u area which could be swapped |
|
* out and thus hard to reach), we do multiple scans over the set of |
|
* descriptors, using use *two* mark bits per object (DEFER and MARK). |
|
* Whenever we find a descriptor which references other descriptors, |
|
* the ones it references are marked with both bits, and we iterate |
|
* over the whole file table until there are no more DEFER bits set. |
|
* We also make an extra pass *before* the GC to clear the mark bits, |
|
* which could have been cleared at almost no cost during the previous |
|
* sweep. |
*/ |
*/ |
static void |
void |
unp_gc(file_t *dp) |
unp_gc(void) |
{ |
{ |
extern struct domain unixdomain; |
file_t *fp, *nextfp; |
file_t *fp, *np; |
|
struct socket *so, *so1; |
struct socket *so, *so1; |
u_int i, old, new; |
file_t **extra_ref, **fpp; |
bool didwork; |
int nunref, nslots, i; |
|
|
KASSERT(curlwp == unp_thread_lwp); |
if (atomic_swap_uint(&unp_gcing, 1) == 1) |
KASSERT(mutex_owned(&filelist_lock)); |
return; |
|
|
/* |
restart: |
* First, process deferred file closures. |
nslots = nfiles * 2; |
*/ |
extra_ref = kmem_alloc(nslots * sizeof(file_t *), KM_SLEEP); |
while (!SLIST_EMPTY(&unp_thread_discard)) { |
|
fp = SLIST_FIRST(&unp_thread_discard); |
|
KASSERT(fp->f_unpcount > 0); |
|
KASSERT(fp->f_count > 0); |
|
KASSERT(fp->f_msgcount > 0); |
|
KASSERT(fp->f_count >= fp->f_unpcount); |
|
KASSERT(fp->f_count >= fp->f_msgcount); |
|
KASSERT(fp->f_msgcount >= fp->f_unpcount); |
|
SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist); |
|
i = fp->f_unpcount; |
|
fp->f_unpcount = 0; |
|
mutex_exit(&filelist_lock); |
|
for (; i != 0; i--) { |
|
unp_discard_now(fp); |
|
} |
|
mutex_enter(&filelist_lock); |
|
} |
|
|
|
/* |
mutex_enter(&filelist_lock); |
* Clear mark bits. Ensure that we don't consider new files |
|
* entering the file table during this loop (they will not have |
|
* FSCAN set). |
|
*/ |
|
unp_defer = 0; |
unp_defer = 0; |
|
|
|
/* Clear mark bits */ |
LIST_FOREACH(fp, &filehead, f_list) { |
LIST_FOREACH(fp, &filehead, f_list) { |
for (old = fp->f_flag;; old = new) { |
atomic_and_uint(&fp->f_flag, ~(FMARK|FDEFER)); |
new = atomic_cas_uint(&fp->f_flag, old, |
|
(old | FSCAN) & ~(FMARK|FDEFER)); |
|
if (__predict_true(old == new)) { |
|
break; |
|
} |
|
} |
|
} |
} |
|
|
/* |
/* |
* Iterate over the set of sockets, marking ones believed (based on |
* Iterate over the set of descriptors, marking ones believed |
* refcount) to be referenced from a process, and marking for rescan |
* (based on refcount) to be referenced from a process, and |
* sockets which are queued on a socket. Recan continues descending |
* marking for rescan descriptors which are queued on a socket. |
* and searching for sockets referenced by sockets (FDEFER), until |
|
* there are no more socket->socket references to be discovered. |
|
*/ |
*/ |
do { |
do { |
didwork = false; |
LIST_FOREACH(fp, &filehead, f_list) { |
for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { |
|
KASSERT(mutex_owned(&filelist_lock)); |
|
np = LIST_NEXT(fp, f_list); |
|
mutex_enter(&fp->f_lock); |
mutex_enter(&fp->f_lock); |
if ((fp->f_flag & FDEFER) != 0) { |
if (fp->f_flag & FDEFER) { |
atomic_and_uint(&fp->f_flag, ~FDEFER); |
atomic_and_uint(&fp->f_flag, ~FDEFER); |
unp_defer--; |
unp_defer--; |
KASSERT(fp->f_count != 0); |
KASSERT(fp->f_count != 0); |
} else { |
} else { |
if (fp->f_count == 0 || |
if (fp->f_count == 0 || |
(fp->f_flag & FMARK) != 0 || |
(fp->f_flag & FMARK) || |
fp->f_count == fp->f_msgcount || |
fp->f_count == fp->f_msgcount) { |
fp->f_unpcount != 0) { |
|
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
continue; |
continue; |
} |
} |
Line 1530 unp_gc(file_t *dp) |
|
Line 1485 unp_gc(file_t *dp) |
|
if (fp->f_type != DTYPE_SOCKET || |
if (fp->f_type != DTYPE_SOCKET || |
(so = fp->f_data) == NULL || |
(so = fp->f_data) == NULL || |
so->so_proto->pr_domain != &unixdomain || |
so->so_proto->pr_domain != &unixdomain || |
(so->so_proto->pr_flags & PR_RIGHTS) == 0) { |
(so->so_proto->pr_flags&PR_RIGHTS) == 0) { |
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
continue; |
continue; |
} |
} |
|
#ifdef notdef |
/* Gain file ref, mark our position, and unlock. */ |
if (so->so_rcv.sb_flags & SB_LOCK) { |
didwork = true; |
mutex_exit(&fp->f_lock); |
LIST_INSERT_AFTER(fp, dp, f_list); |
mutex_exit(&filelist_lock); |
fp->f_count++; |
kmem_free(extra_ref, nslots * sizeof(file_t *)); |
|
/* |
|
* This is problematical; it's not clear |
|
* we need to wait for the sockbuf to be |
|
* unlocked (on a uniprocessor, at least), |
|
* and it's also not clear what to do |
|
* if sbwait returns an error due to receipt |
|
* of a signal. If sbwait does return |
|
* an error, we'll go into an infinite |
|
* loop. Delete all of this for now. |
|
*/ |
|
(void) sbwait(&so->so_rcv); |
|
goto restart; |
|
} |
|
#endif |
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
mutex_exit(&filelist_lock); |
|
|
|
/* |
/* |
* Mark files referenced from sockets queued on the |
* XXX Locking a socket with filelist_lock held |
* accept queue as well. |
* is ugly. filelist_lock can be taken by the |
|
* pagedaemon when reclaiming items from file_cache. |
|
* Socket activity could delay the pagedaemon. |
*/ |
*/ |
solock(so); |
solock(so); |
unp_scan(so->so_rcv.sb_mb, unp_mark, 0); |
unp_scan(so->so_rcv.sb_mb, unp_mark, 0); |
if ((so->so_options & SO_ACCEPTCONN) != 0) { |
/* |
|
* Mark descriptors referenced from sockets queued |
|
* on the accept queue as well. |
|
*/ |
|
if (so->so_options & SO_ACCEPTCONN) { |
TAILQ_FOREACH(so1, &so->so_q0, so_qe) { |
TAILQ_FOREACH(so1, &so->so_q0, so_qe) { |
unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); |
unp_scan(so1->so_rcv.sb_mb, unp_mark, 0); |
} |
} |
Line 1557 unp_gc(file_t *dp) |
|
Line 1531 unp_gc(file_t *dp) |
|
} |
} |
} |
} |
sounlock(so); |
sounlock(so); |
|
|
/* Re-lock and restart from where we left off. */ |
|
closef(fp); |
|
mutex_enter(&filelist_lock); |
|
np = LIST_NEXT(dp, f_list); |
|
LIST_REMOVE(dp, f_list); |
|
} |
} |
/* |
} while (unp_defer); |
* Bail early if we did nothing in the loop above. Could |
|
* happen because of concurrent activity causing unp_defer |
|
* to get out of sync. |
|
*/ |
|
} while (unp_defer != 0 && didwork); |
|
|
|
/* |
/* |
* Sweep pass. |
* Sweep pass. Find unmarked descriptors, and free them. |
* |
* |
* We grab an extra reference to each of the files that are |
* We grab an extra reference to each of the file table entries |
* not otherwise accessible and then free the rights that are |
* that are not otherwise accessible and then free the rights |
* stored in messages on them. |
* that are stored in messages on them. |
|
* |
|
* The bug in the original code is a little tricky, so I'll describe |
|
* what's wrong with it here. |
|
* |
|
* It is incorrect to simply unp_discard each entry for f_msgcount |
|
* times -- consider the case of sockets A and B that contain |
|
* references to each other. On a last close of some other socket, |
|
* we trigger a gc since the number of outstanding rights (unp_rights) |
|
* is non-zero. If during the sweep phase the gc code un_discards, |
|
* we end up doing a (full) closef on the descriptor. A closef on A |
|
* results in the following chain. Closef calls soo_close, which |
|
* calls soclose. Soclose calls first (through the switch |
|
* uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply |
|
* returns because the previous instance had set unp_gcing, and |
|
* we return all the way back to soclose, which marks the socket |
|
* with SS_NOFDREF, and then calls sofree. Sofree calls sorflush |
|
* to free up the rights that are queued in messages on the socket A, |
|
* i.e., the reference on B. The sorflush calls via the dom_dispose |
|
* switch unp_dispose, which unp_scans with unp_discard. This second |
|
* instance of unp_discard just calls closef on B. |
|
* |
|
* Well, a similar chain occurs on B, resulting in a sorflush on B, |
|
* which results in another closef on A. Unfortunately, A is already |
|
* being closed, and the descriptor has already been marked with |
|
* SS_NOFDREF, and soclose panics at this point. |
|
* |
|
* Here, we first take an extra reference to each inaccessible |
|
* descriptor. Then, if the inaccessible descriptor is a |
|
* socket, we call sorflush in case it is a Unix domain |
|
* socket. After we destroy all the rights carried in |
|
* messages, we do a last closef to get rid of our extra |
|
* reference. This is the last close, and the unp_detach etc |
|
* will shut down the socket. |
|
* |
|
* 91/09/19, bsy@cs.cmu.edu |
*/ |
*/ |
for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) { |
if (nslots < nfiles) { |
KASSERT(mutex_owned(&filelist_lock)); |
mutex_exit(&filelist_lock); |
np = LIST_NEXT(fp, f_list); |
kmem_free(extra_ref, nslots * sizeof(file_t *)); |
|
goto restart; |
|
} |
|
for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; |
|
fp = nextfp) { |
|
nextfp = LIST_NEXT(fp, f_list); |
mutex_enter(&fp->f_lock); |
mutex_enter(&fp->f_lock); |
|
if (fp->f_count != 0 && |
/* |
fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { |
* Ignore non-sockets. |
*fpp++ = fp; |
* Ignore dead sockets, or sockets with pending close. |
nunref++; |
* Ignore sockets obviously referenced elsewhere. |
fp->f_count++; |
* Ignore sockets marked as referenced by our scan. |
|
* Ignore new sockets that did not exist during the scan. |
|
*/ |
|
if (fp->f_type != DTYPE_SOCKET || |
|
fp->f_count == 0 || fp->f_unpcount != 0 || |
|
fp->f_count != fp->f_msgcount || |
|
(fp->f_flag & (FMARK | FSCAN)) != FSCAN) { |
|
mutex_exit(&fp->f_lock); |
|
continue; |
|
} |
} |
|
|
/* Gain file ref, mark our position, and unlock. */ |
|
LIST_INSERT_AFTER(fp, dp, f_list); |
|
fp->f_count++; |
|
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
mutex_exit(&filelist_lock); |
|
|
|
/* |
|
* Flush all data from the socket's receive buffer. |
|
* This will cause files referenced only by the |
|
* socket to be queued for close. |
|
*/ |
|
so = fp->f_data; |
|
solock(so); |
|
sorflush(so); |
|
sounlock(so); |
|
|
|
/* Re-lock and restart from where we left off. */ |
|
closef(fp); |
|
mutex_enter(&filelist_lock); |
|
np = LIST_NEXT(dp, f_list); |
|
LIST_REMOVE(dp, f_list); |
|
} |
|
} |
|
|
|
/* |
|
* Garbage collector thread. While SCM_RIGHTS messages are in transit, |
|
* wake once per second to garbage collect. Run continually while we |
|
* have deferred closes to process. |
|
*/ |
|
static void |
|
unp_thread(void *cookie) |
|
{ |
|
file_t *dp; |
|
|
|
/* Allocate a dummy file for our scans. */ |
|
if ((dp = fgetdummy()) == NULL) { |
|
panic("unp_thread"); |
|
} |
} |
|
mutex_exit(&filelist_lock); |
|
|
mutex_enter(&filelist_lock); |
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { |
for (;;) { |
fp = *fpp; |
KASSERT(mutex_owned(&filelist_lock)); |
if (fp->f_type == DTYPE_SOCKET) { |
if (SLIST_EMPTY(&unp_thread_discard)) { |
so = fp->f_data; |
if (unp_rights != 0) { |
solock(so); |
(void)cv_timedwait(&unp_thread_cv, |
sorflush(fp->f_data); |
&filelist_lock, hz); |
sounlock(so); |
} else { |
|
cv_wait(&unp_thread_cv, &filelist_lock); |
|
} |
|
} |
} |
unp_gc(dp); |
|
} |
} |
/* NOTREACHED */ |
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { |
} |
closef(*fpp); |
|
|
/* |
|
* Kick the garbage collector into action if there is something for |
|
* it to process. |
|
*/ |
|
static void |
|
unp_thread_kick(void) |
|
{ |
|
|
|
if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) { |
|
mutex_enter(&filelist_lock); |
|
cv_signal(&unp_thread_cv); |
|
mutex_exit(&filelist_lock); |
|
} |
} |
|
kmem_free(extra_ref, nslots * sizeof(file_t *)); |
|
atomic_swap_uint(&unp_gcing, 0); |
} |
} |
|
|
void |
void |
Line 1673 unp_dispose(struct mbuf *m) |
|
Line 1616 unp_dispose(struct mbuf *m) |
|
{ |
{ |
|
|
if (m) |
if (m) |
unp_scan(m, unp_discard_later, 1); |
unp_scan(m, unp_discard, 1); |
} |
} |
|
|
void |
void |
unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard) |
unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard) |
{ |
{ |
struct mbuf *m; |
struct mbuf *m; |
file_t **rp, *fp; |
file_t **rp; |
struct cmsghdr *cm; |
struct cmsghdr *cm; |
int i, qfds; |
int i; |
|
int qfds; |
|
|
while (m0) { |
while (m0) { |
for (m = m0; m; m = m->m_next) { |
for (m = m0; m; m = m->m_next) { |
if (m->m_type != MT_CONTROL || |
if (m->m_type == MT_CONTROL && |
m->m_len < sizeof(*cm)) { |
m->m_len >= sizeof(*cm)) { |
continue; |
cm = mtod(m, struct cmsghdr *); |
} |
if (cm->cmsg_level != SOL_SOCKET || |
cm = mtod(m, struct cmsghdr *); |
cm->cmsg_type != SCM_RIGHTS) |
if (cm->cmsg_level != SOL_SOCKET || |
continue; |
cm->cmsg_type != SCM_RIGHTS) |
qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) |
continue; |
/ sizeof(file_t *); |
qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) |
rp = (file_t **)CMSG_DATA(cm); |
/ sizeof(file_t *); |
for (i = 0; i < qfds; i++) { |
rp = (file_t **)CMSG_DATA(cm); |
file_t *fp = *rp; |
for (i = 0; i < qfds; i++) { |
if (discard) |
fp = *rp; |
*rp = 0; |
if (discard) { |
(*op)(fp); |
*rp = 0; |
rp++; |
} |
} |
(*op)(fp); |
break; /* XXX, but saves time */ |
rp++; |
|
} |
} |
} |
} |
m0 = m0->m_nextpkt; |
m0 = m0->m_nextpkt; |
Line 1725 unp_mark(file_t *fp) |
|
Line 1668 unp_mark(file_t *fp) |
|
} |
} |
|
|
/* |
/* |
* Minimize the number of deferrals... Sockets are the only type of |
* Minimize the number of deferrals... Sockets are the only |
* file which can hold references to another file, so just mark |
* type of descriptor which can hold references to another |
* other files, and defer unmarked sockets for the next pass. |
* descriptor, so just mark other descriptors, and defer |
|
* unmarked sockets for the next pass. |
*/ |
*/ |
if (fp->f_type == DTYPE_SOCKET) { |
if (fp->f_type == DTYPE_SOCKET) { |
unp_defer++; |
unp_defer++; |
Line 1737 unp_mark(file_t *fp) |
|
Line 1681 unp_mark(file_t *fp) |
|
atomic_or_uint(&fp->f_flag, FMARK); |
atomic_or_uint(&fp->f_flag, FMARK); |
} |
} |
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
|
return; |
} |
} |
|
|
static void |
void |
unp_discard_now(file_t *fp) |
unp_discard(file_t *fp) |
{ |
{ |
|
|
if (fp == NULL) |
if (fp == NULL) |
return; |
return; |
|
|
KASSERT(fp->f_count > 0); |
|
KASSERT(fp->f_msgcount > 0); |
|
|
|
mutex_enter(&fp->f_lock); |
mutex_enter(&fp->f_lock); |
|
KASSERT(fp->f_count > 0); |
fp->f_msgcount--; |
fp->f_msgcount--; |
mutex_exit(&fp->f_lock); |
mutex_exit(&fp->f_lock); |
atomic_dec_uint(&unp_rights); |
atomic_dec_uint(&unp_rights); |
(void)closef(fp); |
(void)closef(fp); |
} |
} |
|
|
static void |
|
unp_discard_later(file_t *fp) |
|
{ |
|
|
|
if (fp == NULL) |
|
return; |
|
|
|
KASSERT(fp->f_count > 0); |
|
KASSERT(fp->f_msgcount > 0); |
|
|
|
mutex_enter(&filelist_lock); |
|
if (fp->f_unpcount++ == 0) { |
|
SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist); |
|
} |
|
mutex_exit(&filelist_lock); |
|
} |
|