Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.95.2.4
1.95.2.4! ad 1: /* $NetBSD: uipc_usrreq.c,v 1.95.2.3 2007/04/12 23:14:20 ad Exp $ */
1.30 thorpej 2:
3: /*-
1.77 matt 4: * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
1.10 cgd 39:
1.1 cgd 40: /*
1.8 mycroft 41: * Copyright (c) 1982, 1986, 1989, 1991, 1993
42: * The Regents of the University of California. All rights reserved.
1.1 cgd 43: *
44: * Redistribution and use in source and binary forms, with or without
45: * modification, are permitted provided that the following conditions
46: * are met:
47: * 1. Redistributions of source code must retain the above copyright
48: * notice, this list of conditions and the following disclaimer.
49: * 2. Redistributions in binary form must reproduce the above copyright
50: * notice, this list of conditions and the following disclaimer in the
51: * documentation and/or other materials provided with the distribution.
1.67 agc 52: * 3. Neither the name of the University nor the names of its contributors
53: * may be used to endorse or promote products derived from this software
54: * without specific prior written permission.
55: *
56: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66: * SUCH DAMAGE.
67: *
68: * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95
69: */
70:
71: /*
72: * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved.
73: *
74: * Redistribution and use in source and binary forms, with or without
75: * modification, are permitted provided that the following conditions
76: * are met:
77: * 1. Redistributions of source code must retain the above copyright
78: * notice, this list of conditions and the following disclaimer.
79: * 2. Redistributions in binary form must reproduce the above copyright
80: * notice, this list of conditions and the following disclaimer in the
81: * documentation and/or other materials provided with the distribution.
1.1 cgd 82: * 3. All advertising materials mentioning features or use of this software
83: * must display the following acknowledgement:
84: * This product includes software developed by the University of
85: * California, Berkeley and its contributors.
86: * 4. Neither the name of the University nor the names of its contributors
87: * may be used to endorse or promote products derived from this software
88: * without specific prior written permission.
89: *
90: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
91: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
94: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100: * SUCH DAMAGE.
101: *
1.31 fvdl 102: * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95
1.1 cgd 103: */
1.53 lukem 104:
105: #include <sys/cdefs.h>
1.95.2.4! ad 106: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.95.2.3 2007/04/12 23:14:20 ad Exp $");
1.1 cgd 107:
1.7 mycroft 108: #include <sys/param.h>
1.8 mycroft 109: #include <sys/systm.h>
1.7 mycroft 110: #include <sys/proc.h>
111: #include <sys/filedesc.h>
112: #include <sys/domain.h>
113: #include <sys/protosw.h>
114: #include <sys/socket.h>
115: #include <sys/socketvar.h>
116: #include <sys/unpcb.h>
117: #include <sys/un.h>
118: #include <sys/namei.h>
119: #include <sys/vnode.h>
120: #include <sys/file.h>
121: #include <sys/stat.h>
122: #include <sys/mbuf.h>
1.91 elad 123: #include <sys/kauth.h>
1.95.2.3 ad 124: #include <sys/kmem.h>
1.1 cgd 125:
126: /*
127: * Unix communications domain.
128: *
129: * TODO:
130: * SEQPACKET, RDM
131: * rethink name space problems
132: * need a proper out-of-band
133: */
1.93 christos 134: const struct sockaddr_un sun_noname = {
135: .sun_len = sizeof(sun_noname),
136: .sun_family = AF_LOCAL,
137: };
1.1 cgd 138: ino_t unp_ino; /* prototype for fake inode numbers */
139:
1.92 ad 140: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.30 thorpej 141:
1.20 mycroft 142: int
1.76 matt 143: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92 ad 144: struct lwp *l)
1.20 mycroft 145: {
146: struct socket *so2;
1.77 matt 147: const struct sockaddr_un *sun;
1.20 mycroft 148:
149: so2 = unp->unp_conn->unp_socket;
150: if (unp->unp_addr)
151: sun = unp->unp_addr;
152: else
153: sun = &sun_noname;
1.30 thorpej 154: if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92 ad 155: control = unp_addsockcred(l, control);
1.82 christos 156: if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20 mycroft 157: control) == 0) {
158: m_freem(control);
159: m_freem(m);
1.79 darrenr 160: so2->so_rcv.sb_overflowed++;
1.60 christos 161: return (ENOBUFS);
1.20 mycroft 162: } else {
163: sorwakeup(so2);
164: return (0);
165: }
166: }
167:
168: void
1.76 matt 169: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20 mycroft 170: {
1.77 matt 171: const struct sockaddr_un *sun;
1.20 mycroft 172:
173: if (unp->unp_addr)
174: sun = unp->unp_addr;
175: else
176: sun = &sun_noname;
177: nam->m_len = sun->sun_len;
1.56 itojun 178: if (nam->m_len > MLEN)
1.27 thorpej 179: MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95 christos 180: memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20 mycroft 181: }
182:
183: void
1.76 matt 184: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20 mycroft 185: {
1.77 matt 186: const struct sockaddr_un *sun;
1.20 mycroft 187:
188: if (unp->unp_conn && unp->unp_conn->unp_addr)
189: sun = unp->unp_conn->unp_addr;
190: else
191: sun = &sun_noname;
192: nam->m_len = sun->sun_len;
1.56 itojun 193: if (nam->m_len > MLEN)
1.27 thorpej 194: MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95 christos 195: memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20 mycroft 196: }
197:
1.1 cgd 198: /*ARGSUSED*/
1.5 andrew 199: int
1.76 matt 200: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86 christos 201: struct mbuf *control, struct lwp *l)
1.1 cgd 202: {
203: struct unpcb *unp = sotounpcb(so);
1.46 augustss 204: struct socket *so2;
1.86 christos 205: struct proc *p;
1.75 christos 206: u_int newhiwat;
1.46 augustss 207: int error = 0;
1.1 cgd 208:
209: if (req == PRU_CONTROL)
210: return (EOPNOTSUPP);
1.20 mycroft 211:
1.22 mycroft 212: #ifdef DIAGNOSTIC
213: if (req != PRU_SEND && req != PRU_SENDOOB && control)
214: panic("uipc_usrreq: unexpected control mbuf");
215: #endif
1.86 christos 216: p = l ? l->l_proc : NULL;
1.1 cgd 217: if (unp == 0 && req != PRU_ATTACH) {
218: error = EINVAL;
219: goto release;
220: }
1.20 mycroft 221:
1.1 cgd 222: switch (req) {
223:
224: case PRU_ATTACH:
1.20 mycroft 225: if (unp != 0) {
1.1 cgd 226: error = EISCONN;
227: break;
228: }
229: error = unp_attach(so);
230: break;
231:
232: case PRU_DETACH:
233: unp_detach(unp);
234: break;
235:
236: case PRU_BIND:
1.90 christos 237: KASSERT(l != NULL);
1.86 christos 238: error = unp_bind(unp, nam, l);
1.1 cgd 239: break;
240:
241: case PRU_LISTEN:
242: if (unp->unp_vnode == 0)
243: error = EINVAL;
244: break;
245:
246: case PRU_CONNECT:
1.90 christos 247: KASSERT(l != NULL);
1.86 christos 248: error = unp_connect(so, nam, l);
1.1 cgd 249: break;
250:
251: case PRU_CONNECT2:
1.72 matt 252: error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1 cgd 253: break;
254:
255: case PRU_DISCONNECT:
256: unp_disconnect(unp);
257: break;
258:
259: case PRU_ACCEPT:
1.20 mycroft 260: unp_setpeeraddr(unp, nam);
1.72 matt 261: /*
262: * Mark the initiating STREAM socket as connected *ONLY*
263: * after it's been accepted. This prevents a client from
264: * overrunning a server and receiving ECONNREFUSED.
265: */
266: if (unp->unp_conn != NULL &&
267: (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
268: soisconnected(unp->unp_conn->unp_socket);
1.1 cgd 269: break;
270:
271: case PRU_SHUTDOWN:
272: socantsendmore(so);
273: unp_shutdown(unp);
274: break;
275:
276: case PRU_RCVD:
277: switch (so->so_type) {
278:
279: case SOCK_DGRAM:
280: panic("uipc 1");
281: /*NOTREACHED*/
282:
283: case SOCK_STREAM:
284: #define rcv (&so->so_rcv)
285: #define snd (&so2->so_snd)
286: if (unp->unp_conn == 0)
287: break;
288: so2 = unp->unp_conn->unp_socket;
289: /*
290: * Adjust backpressure on sender
291: * and wakeup any waiting to write.
292: */
293: snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
294: unp->unp_mbcnt = rcv->sb_mbcnt;
1.75 christos 295: newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81 christos 296: (void)chgsbsize(so2->so_uidinfo,
1.75 christos 297: &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1 cgd 298: unp->unp_cc = rcv->sb_cc;
299: sowwakeup(so2);
300: #undef snd
301: #undef rcv
302: break;
303:
304: default:
305: panic("uipc 2");
306: }
307: break;
308:
309: case PRU_SEND:
1.30 thorpej 310: /*
311: * Note: unp_internalize() rejects any control message
312: * other than SCM_RIGHTS, and only allows one. This
313: * has the side-effect of preventing a caller from
314: * forging SCM_CREDS.
315: */
1.90 christos 316: if (control) {
317: KASSERT(l != NULL);
318: if ((error = unp_internalize(control, l)) != 0)
319: goto die;
1.83 yamt 320: }
1.1 cgd 321: switch (so->so_type) {
322:
323: case SOCK_DGRAM: {
324: if (nam) {
1.20 mycroft 325: if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1 cgd 326: error = EISCONN;
1.21 mycroft 327: goto die;
1.1 cgd 328: }
1.90 christos 329: KASSERT(l != NULL);
1.86 christos 330: error = unp_connect(so, nam, l);
1.20 mycroft 331: if (error) {
1.23 mycroft 332: die:
1.21 mycroft 333: m_freem(control);
1.20 mycroft 334: m_freem(m);
1.1 cgd 335: break;
1.20 mycroft 336: }
1.1 cgd 337: } else {
1.20 mycroft 338: if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1 cgd 339: error = ENOTCONN;
1.21 mycroft 340: goto die;
1.1 cgd 341: }
342: }
1.89 christos 343: KASSERT(p != NULL);
1.92 ad 344: error = unp_output(m, control, unp, l);
1.1 cgd 345: if (nam)
346: unp_disconnect(unp);
347: break;
348: }
349:
350: case SOCK_STREAM:
351: #define rcv (&so2->so_rcv)
352: #define snd (&so->so_snd)
1.87 christos 353: if (unp->unp_conn == NULL) {
354: error = ENOTCONN;
355: break;
356: }
1.1 cgd 357: so2 = unp->unp_conn->unp_socket;
1.30 thorpej 358: if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
359: /*
360: * Credentials are passed only once on
361: * SOCK_STREAM.
362: */
363: unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92 ad 364: control = unp_addsockcred(l, control);
1.30 thorpej 365: }
1.1 cgd 366: /*
367: * Send to paired receive port, and then reduce
368: * send buffer hiwater marks to maintain backpressure.
369: * Wake up readers.
370: */
371: if (control) {
1.21 mycroft 372: if (sbappendcontrol(rcv, m, control) == 0)
373: m_freem(control);
1.1 cgd 374: } else
375: sbappend(rcv, m);
376: snd->sb_mbmax -=
377: rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
378: unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75 christos 379: newhiwat = snd->sb_hiwat -
380: (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81 christos 381: (void)chgsbsize(so->so_uidinfo,
1.75 christos 382: &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1 cgd 383: unp->unp_conn->unp_cc = rcv->sb_cc;
384: sorwakeup(so2);
385: #undef snd
386: #undef rcv
387: break;
388:
389: default:
390: panic("uipc 4");
391: }
392: break;
393:
394: case PRU_ABORT:
395: unp_drop(unp, ECONNABORTED);
1.39 sommerfe 396:
1.88 matt 397: KASSERT(so->so_head == NULL);
1.39 sommerfe 398: #ifdef DIAGNOSTIC
399: if (so->so_pcb == 0)
400: panic("uipc 5: drop killed pcb");
401: #endif
402: unp_detach(unp);
1.1 cgd 403: break;
404:
405: case PRU_SENSE:
406: ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
407: if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
408: so2 = unp->unp_conn->unp_socket;
409: ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
410: }
411: ((struct stat *) m)->st_dev = NODEV;
412: if (unp->unp_ino == 0)
413: unp->unp_ino = unp_ino++;
1.25 kleink 414: ((struct stat *) m)->st_atimespec =
415: ((struct stat *) m)->st_mtimespec =
416: ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1 cgd 417: ((struct stat *) m)->st_ino = unp->unp_ino;
418: return (0);
419:
420: case PRU_RCVOOB:
1.20 mycroft 421: error = EOPNOTSUPP;
422: break;
1.1 cgd 423:
424: case PRU_SENDOOB:
1.22 mycroft 425: m_freem(control);
1.20 mycroft 426: m_freem(m);
1.1 cgd 427: error = EOPNOTSUPP;
428: break;
429:
430: case PRU_SOCKADDR:
1.20 mycroft 431: unp_setsockaddr(unp, nam);
1.1 cgd 432: break;
433:
434: case PRU_PEERADDR:
1.20 mycroft 435: unp_setpeeraddr(unp, nam);
1.1 cgd 436: break;
437:
438: default:
439: panic("piusrreq");
440: }
1.20 mycroft 441:
1.1 cgd 442: release:
443: return (error);
444: }
445:
446: /*
1.30 thorpej 447: * Unix domain socket option processing.
448: */
449: int
1.76 matt 450: uipc_ctloutput(int op, struct socket *so, int level, int optname,
451: struct mbuf **mp)
1.30 thorpej 452: {
453: struct unpcb *unp = sotounpcb(so);
454: struct mbuf *m = *mp;
455: int optval = 0, error = 0;
456:
457: if (level != 0) {
458: error = EINVAL;
459: if (op == PRCO_SETOPT && m)
460: (void) m_free(m);
461: } else switch (op) {
462:
463: case PRCO_SETOPT:
464: switch (optname) {
465: case LOCAL_CREDS:
1.72 matt 466: case LOCAL_CONNWAIT:
1.30 thorpej 467: if (m == NULL || m->m_len != sizeof(int))
468: error = EINVAL;
469: else {
470: optval = *mtod(m, int *);
471: switch (optname) {
472: #define OPTSET(bit) \
473: if (optval) \
474: unp->unp_flags |= (bit); \
475: else \
476: unp->unp_flags &= ~(bit);
477:
478: case LOCAL_CREDS:
479: OPTSET(UNP_WANTCRED);
480: break;
1.72 matt 481: case LOCAL_CONNWAIT:
482: OPTSET(UNP_CONNWAIT);
483: break;
1.30 thorpej 484: }
485: }
486: break;
487: #undef OPTSET
488:
489: default:
490: error = ENOPROTOOPT;
491: break;
492: }
493: if (m)
494: (void) m_free(m);
495: break;
496:
497: case PRCO_GETOPT:
498: switch (optname) {
499: case LOCAL_CREDS:
500: *mp = m = m_get(M_WAIT, MT_SOOPTS);
501: m->m_len = sizeof(int);
502: switch (optname) {
503:
504: #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0)
505:
506: case LOCAL_CREDS:
507: optval = OPTBIT(UNP_WANTCRED);
508: break;
509: }
510: *mtod(m, int *) = optval;
511: break;
512: #undef OPTBIT
513:
514: default:
515: error = ENOPROTOOPT;
516: break;
517: }
518: break;
519: }
520: return (error);
521: }
522:
523: /*
1.1 cgd 524: * Both send and receive buffers are allocated PIPSIZ bytes of buffering
525: * for stream sockets, although the total for sender and receiver is
526: * actually only PIPSIZ.
527: * Datagram sockets really use the sendspace as the maximum datagram size,
528: * and don't really want to reserve the sendspace. Their recvspace should
529: * be large enough for at least one max-size datagram plus address.
530: */
531: #define PIPSIZ 4096
532: u_long unpst_sendspace = PIPSIZ;
533: u_long unpst_recvspace = PIPSIZ;
534: u_long unpdg_sendspace = 2*1024; /* really max datagram size */
535: u_long unpdg_recvspace = 4*1024;
536:
537: int unp_rights; /* file descriptors in flight */
538:
1.5 andrew 539: int
1.76 matt 540: unp_attach(struct socket *so)
1.1 cgd 541: {
1.46 augustss 542: struct unpcb *unp;
1.1 cgd 543: int error;
1.80 perry 544:
1.1 cgd 545: if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
546: switch (so->so_type) {
547:
548: case SOCK_STREAM:
549: error = soreserve(so, unpst_sendspace, unpst_recvspace);
550: break;
551:
552: case SOCK_DGRAM:
553: error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
554: break;
1.8 mycroft 555:
556: default:
557: panic("unp_attach");
1.1 cgd 558: }
559: if (error)
560: return (error);
561: }
1.14 mycroft 562: unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
563: if (unp == NULL)
1.1 cgd 564: return (ENOBUFS);
1.95 christos 565: memset((void *)unp, 0, sizeof(*unp));
1.14 mycroft 566: unp->unp_socket = so;
1.15 mycroft 567: so->so_pcb = unp;
1.85 simonb 568: nanotime(&unp->unp_ctime);
1.1 cgd 569: return (0);
570: }
571:
1.17 pk 572: void
1.76 matt 573: unp_detach(struct unpcb *unp)
1.1 cgd 574: {
1.80 perry 575:
1.1 cgd 576: if (unp->unp_vnode) {
577: unp->unp_vnode->v_socket = 0;
578: vrele(unp->unp_vnode);
579: unp->unp_vnode = 0;
580: }
581: if (unp->unp_conn)
582: unp_disconnect(unp);
583: while (unp->unp_refs)
584: unp_drop(unp->unp_refs, ECONNRESET);
585: soisdisconnected(unp->unp_socket);
586: unp->unp_socket->so_pcb = 0;
1.20 mycroft 587: if (unp->unp_addr)
1.26 thorpej 588: free(unp->unp_addr, M_SONAME);
1.8 mycroft 589: if (unp_rights) {
590: /*
591: * Normally the receive buffer is flushed later,
592: * in sofree, but if our receive buffer holds references
593: * to descriptors that are now garbage, we will dispose
594: * of those descriptor references after the garbage collector
595: * gets them (resulting in a "panic: closef: count < 0").
596: */
597: sorflush(unp->unp_socket);
1.14 mycroft 598: free(unp, M_PCB);
1.1 cgd 599: unp_gc();
1.14 mycroft 600: } else
601: free(unp, M_PCB);
1.1 cgd 602: }
603:
1.5 andrew 604: int
1.86 christos 605: unp_bind(struct unpcb *unp, struct mbuf *nam, struct lwp *l)
1.1 cgd 606: {
1.27 thorpej 607: struct sockaddr_un *sun;
1.46 augustss 608: struct vnode *vp;
1.1 cgd 609: struct vattr vattr;
1.27 thorpej 610: size_t addrlen;
1.86 christos 611: struct proc *p;
1.1 cgd 612: int error;
613: struct nameidata nd;
614:
1.20 mycroft 615: if (unp->unp_vnode != 0)
616: return (EINVAL);
1.27 thorpej 617:
1.86 christos 618: p = l->l_proc;
1.27 thorpej 619: /*
620: * Allocate the new sockaddr. We have to allocate one
621: * extra byte so that we can ensure that the pathname
622: * is nul-terminated.
623: */
624: addrlen = nam->m_len + 1;
625: sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95 christos 626: m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27 thorpej 627: *(((char *)sun) + nam->m_len) = '\0';
628:
1.95.2.4! ad 629: NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE,
1.86 christos 630: sun->sun_path, l);
1.27 thorpej 631:
1.1 cgd 632: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16 christos 633: if ((error = namei(&nd)) != 0)
1.27 thorpej 634: goto bad;
1.9 mycroft 635: vp = nd.ni_vp;
1.95.2.2 ad 636: if (vp != NULL) {
1.9 mycroft 637: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
638: if (nd.ni_dvp == vp)
639: vrele(nd.ni_dvp);
1.1 cgd 640: else
1.9 mycroft 641: vput(nd.ni_dvp);
1.1 cgd 642: vrele(vp);
1.95.2.2 ad 643: error = EADDRINUSE;
644: goto bad;
1.1 cgd 645: }
646: VATTR_NULL(&vattr);
647: vattr.va_type = VSOCK;
1.84 jmmv 648: vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.92 ad 649: VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1.16 christos 650: error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
651: if (error)
1.27 thorpej 652: goto bad;
1.9 mycroft 653: vp = nd.ni_vp;
1.1 cgd 654: vp->v_socket = unp->unp_socket;
655: unp->unp_vnode = vp;
1.27 thorpej 656: unp->unp_addrlen = addrlen;
657: unp->unp_addr = sun;
1.31 fvdl 658: VOP_UNLOCK(vp, 0);
1.1 cgd 659: return (0);
1.27 thorpej 660:
661: bad:
662: free(sun, M_SONAME);
663: return (error);
1.1 cgd 664: }
665:
1.5 andrew 666: int
1.86 christos 667: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1 cgd 668: {
1.46 augustss 669: struct sockaddr_un *sun;
670: struct vnode *vp;
671: struct socket *so2, *so3;
1.1 cgd 672: struct unpcb *unp2, *unp3;
1.27 thorpej 673: size_t addrlen;
1.1 cgd 674: int error;
675: struct nameidata nd;
676:
1.27 thorpej 677: /*
678: * Allocate a temporary sockaddr. We have to allocate one extra
679: * byte so that we can ensure that the pathname is nul-terminated.
680: * When we establish the connection, we copy the other PCB's
681: * sockaddr to our own.
682: */
683: addrlen = nam->m_len + 1;
684: sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95 christos 685: m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27 thorpej 686: *(((char *)sun) + nam->m_len) = '\0';
687:
1.95.2.4! ad 688: NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, sun->sun_path, l);
1.27 thorpej 689:
1.16 christos 690: if ((error = namei(&nd)) != 0)
1.27 thorpej 691: goto bad2;
1.9 mycroft 692: vp = nd.ni_vp;
1.1 cgd 693: if (vp->v_type != VSOCK) {
694: error = ENOTSOCK;
695: goto bad;
696: }
1.92 ad 697: if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) != 0)
1.1 cgd 698: goto bad;
699: so2 = vp->v_socket;
700: if (so2 == 0) {
701: error = ECONNREFUSED;
702: goto bad;
703: }
704: if (so->so_type != so2->so_type) {
705: error = EPROTOTYPE;
706: goto bad;
707: }
708: if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
709: if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
710: (so3 = sonewconn(so2, 0)) == 0) {
711: error = ECONNREFUSED;
712: goto bad;
713: }
714: unp2 = sotounpcb(so2);
715: unp3 = sotounpcb(so3);
1.26 thorpej 716: if (unp2->unp_addr) {
717: unp3->unp_addr = malloc(unp2->unp_addrlen,
718: M_SONAME, M_WAITOK);
1.36 perry 719: memcpy(unp3->unp_addr, unp2->unp_addr,
1.26 thorpej 720: unp2->unp_addrlen);
721: unp3->unp_addrlen = unp2->unp_addrlen;
722: }
1.30 thorpej 723: unp3->unp_flags = unp2->unp_flags;
1.33 thorpej 724: so2 = so3;
725: }
1.72 matt 726: error = unp_connect2(so, so2, PRU_CONNECT);
1.27 thorpej 727: bad:
1.1 cgd 728: vput(vp);
1.27 thorpej 729: bad2:
730: free(sun, M_SONAME);
1.1 cgd 731: return (error);
732: }
733:
1.5 andrew 734: int
1.76 matt 735: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1 cgd 736: {
1.46 augustss 737: struct unpcb *unp = sotounpcb(so);
738: struct unpcb *unp2;
1.1 cgd 739:
740: if (so2->so_type != so->so_type)
741: return (EPROTOTYPE);
742: unp2 = sotounpcb(so2);
743: unp->unp_conn = unp2;
744: switch (so->so_type) {
745:
746: case SOCK_DGRAM:
747: unp->unp_nextref = unp2->unp_refs;
748: unp2->unp_refs = unp;
749: soisconnected(so);
750: break;
751:
752: case SOCK_STREAM:
753: unp2->unp_conn = unp;
1.72 matt 754: if (req == PRU_CONNECT &&
755: ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
756: soisconnecting(so);
757: else
758: soisconnected(so);
1.1 cgd 759: soisconnected(so2);
760: break;
761:
762: default:
763: panic("unp_connect2");
764: }
765: return (0);
766: }
767:
1.5 andrew 768: void
1.76 matt 769: unp_disconnect(struct unpcb *unp)
1.1 cgd 770: {
1.46 augustss 771: struct unpcb *unp2 = unp->unp_conn;
1.1 cgd 772:
773: if (unp2 == 0)
774: return;
775: unp->unp_conn = 0;
776: switch (unp->unp_socket->so_type) {
777:
778: case SOCK_DGRAM:
779: if (unp2->unp_refs == unp)
780: unp2->unp_refs = unp->unp_nextref;
781: else {
782: unp2 = unp2->unp_refs;
783: for (;;) {
784: if (unp2 == 0)
785: panic("unp_disconnect");
786: if (unp2->unp_nextref == unp)
787: break;
788: unp2 = unp2->unp_nextref;
789: }
790: unp2->unp_nextref = unp->unp_nextref;
791: }
792: unp->unp_nextref = 0;
793: unp->unp_socket->so_state &= ~SS_ISCONNECTED;
794: break;
795:
796: case SOCK_STREAM:
797: soisdisconnected(unp->unp_socket);
798: unp2->unp_conn = 0;
799: soisdisconnected(unp2->unp_socket);
800: break;
801: }
802: }
803:
804: #ifdef notdef
1.76 matt 805: unp_abort(struct unpcb *unp)
1.1 cgd 806: {
807: unp_detach(unp);
808: }
809: #endif
810:
1.5 andrew 811: void
1.76 matt 812: unp_shutdown(struct unpcb *unp)
1.1 cgd 813: {
814: struct socket *so;
815:
816: if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
817: (so = unp->unp_conn->unp_socket))
818: socantrcvmore(so);
819: }
820:
1.5 andrew 821: void
1.76 matt 822: unp_drop(struct unpcb *unp, int errno)
1.1 cgd 823: {
824: struct socket *so = unp->unp_socket;
825:
826: so->so_error = errno;
827: unp_disconnect(unp);
828: if (so->so_head) {
1.15 mycroft 829: so->so_pcb = 0;
1.14 mycroft 830: sofree(so);
1.20 mycroft 831: if (unp->unp_addr)
1.26 thorpej 832: free(unp->unp_addr, M_SONAME);
1.14 mycroft 833: free(unp, M_PCB);
1.1 cgd 834: }
835: }
836:
837: #ifdef notdef
1.76 matt 838: unp_drain(void)
1.1 cgd 839: {
840:
841: }
842: #endif
843:
1.5 andrew 844: int
1.86 christos 845: unp_externalize(struct mbuf *rights, struct lwp *l)
1.1 cgd 846: {
1.46 augustss 847: struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.86 christos 848: struct proc *p = l->l_proc;
1.47 thorpej 849: int i, *fdp;
1.46 augustss 850: struct file **rp;
851: struct file *fp;
1.50 thorpej 852: int nfds, error = 0;
1.47 thorpej 853:
854: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
855: sizeof(struct file *);
856: rp = (struct file **)CMSG_DATA(cm);
1.1 cgd 857:
1.50 thorpej 858: fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
1.95.2.3 ad 859: rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50 thorpej 860:
1.39 sommerfe 861: /* Make sure the recipient should be able to see the descriptors.. */
1.42 thorpej 862: if (p->p_cwdi->cwdi_rdir != NULL) {
1.48 thorpej 863: rp = (struct file **)CMSG_DATA(cm);
1.39 sommerfe 864: for (i = 0; i < nfds; i++) {
865: fp = *rp++;
866: /*
867: * If we are in a chroot'ed directory, and
868: * someone wants to pass us a directory, make
869: * sure it's inside the subtree we're allowed
870: * to access.
871: */
872: if (fp->f_type == DTYPE_VNODE) {
873: struct vnode *vp = (struct vnode *)fp->f_data;
874: if ((vp->v_type == VDIR) &&
1.86 christos 875: !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
1.39 sommerfe 876: error = EPERM;
877: break;
878: }
879: }
880: }
881: }
1.50 thorpej 882:
883: restart:
1.47 thorpej 884: rp = (struct file **)CMSG_DATA(cm);
1.50 thorpej 885: if (error != 0) {
1.24 cgd 886: for (i = 0; i < nfds; i++) {
1.1 cgd 887: fp = *rp;
1.39 sommerfe 888: /*
889: * zero the pointer before calling unp_discard,
890: * since it may end up in unp_gc()..
891: */
892: *rp++ = 0;
1.1 cgd 893: unp_discard(fp);
894: }
1.50 thorpej 895: goto out;
1.1 cgd 896: }
1.50 thorpej 897:
1.24 cgd 898: /*
1.50 thorpej 899: * First loop -- allocate file descriptor table slots for the
900: * new descriptors.
1.24 cgd 901: */
902: for (i = 0; i < nfds; i++) {
1.39 sommerfe 903: fp = *rp++;
1.50 thorpej 904: if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49 thorpej 905: /*
1.50 thorpej 906: * Back out what we've done so far.
1.49 thorpej 907: */
1.50 thorpej 908: for (--i; i >= 0; i--)
909: fdremove(p->p_fd, fdp[i]);
910:
911: if (error == ENOSPC) {
912: fdexpand(p);
913: error = 0;
914: } else {
915: /*
916: * This is the error that has historically
917: * been returned, and some callers may
918: * expect it.
919: */
920: error = EMSGSIZE;
921: }
922: goto restart;
1.49 thorpej 923: }
1.50 thorpej 924:
925: /*
926: * Make the slot reference the descriptor so that
927: * fdalloc() works properly.. We finalize it all
928: * in the loop below.
929: */
1.95.2.3 ad 930: rw_enter(&p->p_fd->fd_lock, RW_WRITER);
1.50 thorpej 931: p->p_fd->fd_ofiles[fdp[i]] = fp;
1.95.2.3 ad 932: rw_exit(&p->p_fd->fd_lock);
1.1 cgd 933: }
1.24 cgd 934:
935: /*
1.50 thorpej 936: * Now that adding them has succeeded, update all of the
937: * descriptor passing state.
1.24 cgd 938: */
1.50 thorpej 939: rp = (struct file **)CMSG_DATA(cm);
940: for (i = 0; i < nfds; i++) {
941: fp = *rp++;
942: fp->f_msgcount--;
943: unp_rights--;
944: }
945:
946: /*
947: * Copy temporary array to message and adjust length, in case of
948: * transition from large struct file pointers to ints.
949: */
950: memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47 thorpej 951: cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
952: rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50 thorpej 953: out:
1.95.2.3 ad 954: rw_exit(&p->p_cwdi->cwdi_lock);
1.50 thorpej 955: free(fdp, M_TEMP);
956: return (error);
1.1 cgd 957: }
958:
1.5 andrew 959: int
1.86 christos 960: unp_internalize(struct mbuf *control, struct lwp *l)
1.1 cgd 961: {
1.86 christos 962: struct proc *p = l->l_proc;
1.24 cgd 963: struct filedesc *fdescp = p->p_fd;
1.73 martin 964: struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
965: struct file **rp, **files;
1.46 augustss 966: struct file *fp;
967: int i, fd, *fdp;
1.24 cgd 968: int nfds;
969: u_int neededspace;
1.38 thorpej 970:
1.24 cgd 971: /* Sanity check the control message header */
1.66 jdolecek 972: if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1 cgd 973: cm->cmsg_len != control->m_len)
974: return (EINVAL);
1.24 cgd 975:
976: /* Verify that the file descriptors are valid */
1.47 thorpej 977: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
978: fdp = (int *)CMSG_DATA(cm);
1.24 cgd 979: for (i = 0; i < nfds; i++) {
980: fd = *fdp++;
1.58 pk 981: if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1 cgd 982: return (EBADF);
1.95.2.1 ad 983: /* XXXSMP grab reference to file */
984: mutex_exit(&fp->f_lock);
1.1 cgd 985: }
1.24 cgd 986:
987: /* Make sure we have room for the struct file pointers */
1.47 thorpej 988: neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
989: control->m_len;
1.24 cgd 990: if (neededspace > M_TRAILINGSPACE(control)) {
991:
1.73 martin 992: /* allocate new space and copy header into it */
993: newcm = malloc(
994: CMSG_SPACE(nfds * sizeof(struct file *)),
995: M_MBUF, M_WAITOK);
1.95.2.1 ad 996: if (newcm == NULL) {
997: /* XXXSMP drop references to files */
1.24 cgd 998: return (E2BIG);
1.95.2.1 ad 999: }
1.73 martin 1000: memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80 perry 1001: files = (struct file **)CMSG_DATA(newcm);
1.73 martin 1002: } else {
1003: /* we can convert in-place */
1004: newcm = NULL;
1005: files = (struct file **)CMSG_DATA(cm);
1.24 cgd 1006: }
1007:
1008: /*
1009: * Transform the file descriptors into struct file pointers, in
1010: * reverse order so that if pointers are bigger than ints, the
1011: * int won't get until we're done.
1012: */
1.95.2.3 ad 1013: rw_enter(&fdescp->fd_lock, RW_READER);
1.94 cbiere 1014: fdp = (int *)CMSG_DATA(cm) + nfds;
1015: rp = files + nfds;
1.24 cgd 1016: for (i = 0; i < nfds; i++) {
1.94 cbiere 1017: fp = fdescp->fd_ofiles[*--fdp];
1.95.2.1 ad 1018: mutex_enter(&fp->f_lock);
1.57 pk 1019: #ifdef DIAGNOSTIC
1020: if (fp->f_iflags & FIF_WANTCLOSE)
1021: panic("unp_internalize: file already closed");
1022: #endif
1.94 cbiere 1023: *--rp = fp;
1.1 cgd 1024: fp->f_count++;
1025: fp->f_msgcount++;
1.95.2.1 ad 1026: mutex_exit(&fp->f_lock);
1.1 cgd 1027: unp_rights++;
1028: }
1.95.2.3 ad 1029: rw_exit(&fdescp->fd_lock);
1.73 martin 1030:
1031: if (newcm) {
1032: if (control->m_flags & M_EXT)
1033: MEXTREMOVE(control);
1034: MEXTADD(control, newcm,
1035: CMSG_SPACE(nfds * sizeof(struct file *)),
1036: M_MBUF, NULL, NULL);
1037: cm = newcm;
1038: }
1039:
1040: /* adjust message & mbuf to note amount of space actually used. */
1041: cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
1042: control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
1043:
1.1 cgd 1044: return (0);
1.30 thorpej 1045: }
1046:
1047: struct mbuf *
1.92 ad 1048: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30 thorpej 1049: {
1050: struct cmsghdr *cmp;
1051: struct sockcred *sc;
1052: struct mbuf *m, *n;
1.47 thorpej 1053: int len, space, i;
1.30 thorpej 1054:
1.92 ad 1055: len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1056: space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30 thorpej 1057:
1058: m = m_get(M_WAIT, MT_CONTROL);
1.47 thorpej 1059: if (space > MLEN) {
1060: if (space > MCLBYTES)
1061: MEXTMALLOC(m, space, M_WAITOK);
1.30 thorpej 1062: else
1.59 matt 1063: m_clget(m, M_WAIT);
1.30 thorpej 1064: if ((m->m_flags & M_EXT) == 0) {
1065: m_free(m);
1066: return (control);
1067: }
1068: }
1069:
1.47 thorpej 1070: m->m_len = space;
1.30 thorpej 1071: m->m_next = NULL;
1072: cmp = mtod(m, struct cmsghdr *);
1073: sc = (struct sockcred *)CMSG_DATA(cmp);
1074: cmp->cmsg_len = len;
1075: cmp->cmsg_level = SOL_SOCKET;
1076: cmp->cmsg_type = SCM_CREDS;
1.92 ad 1077: sc->sc_uid = kauth_cred_getuid(l->l_cred);
1078: sc->sc_euid = kauth_cred_geteuid(l->l_cred);
1079: sc->sc_gid = kauth_cred_getgid(l->l_cred);
1080: sc->sc_egid = kauth_cred_getegid(l->l_cred);
1081: sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30 thorpej 1082: for (i = 0; i < sc->sc_ngroups; i++)
1.92 ad 1083: sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30 thorpej 1084:
1085: /*
1086: * If a control message already exists, append us to the end.
1087: */
1088: if (control != NULL) {
1089: for (n = control; n->m_next != NULL; n = n->m_next)
1090: ;
1091: n->m_next = m;
1092: } else
1093: control = m;
1094:
1095: return (control);
1.1 cgd 1096: }
1097:
1098: int unp_defer, unp_gcing;
1099: extern struct domain unixdomain;
1100:
1.39 sommerfe 1101: /*
1102: * Comment added long after the fact explaining what's going on here.
1103: * Do a mark-sweep GC of file descriptors on the system, to free up
1104: * any which are caught in flight to an about-to-be-closed socket.
1105: *
1106: * Traditional mark-sweep gc's start at the "root", and mark
1107: * everything reachable from the root (which, in our case would be the
1108: * process table). The mark bits are cleared during the sweep.
1109: *
1110: * XXX For some inexplicable reason (perhaps because the file
1111: * descriptor tables used to live in the u area which could be swapped
1112: * out and thus hard to reach), we do multiple scans over the set of
1113: * descriptors, using use *two* mark bits per object (DEFER and MARK).
1114: * Whenever we find a descriptor which references other descriptors,
1115: * the ones it references are marked with both bits, and we iterate
1116: * over the whole file table until there are no more DEFER bits set.
1117: * We also make an extra pass *before* the GC to clear the mark bits,
1118: * which could have been cleared at almost no cost during the previous
1119: * sweep.
1120: *
1121: * XXX MP: this needs to run with locks such that no other thread of
1122: * control can create or destroy references to file descriptors. it
1123: * may be necessary to defer the GC until later (when the locking
1124: * situation is more hospitable); it may be necessary to push this
1125: * into a separate thread.
1126: */
1.5 andrew 1127: void
1.76 matt 1128: unp_gc(void)
1.1 cgd 1129: {
1.46 augustss 1130: struct file *fp, *nextfp;
1131: struct socket *so, *so1;
1.8 mycroft 1132: struct file **extra_ref, **fpp;
1133: int nunref, i;
1.1 cgd 1134:
1135: if (unp_gcing)
1136: return;
1137: unp_gcing = 1;
1138: unp_defer = 0;
1.39 sommerfe 1139:
1.95.2.3 ad 1140: mutex_enter(&filelist_lock);
1141:
1.39 sommerfe 1142: /* Clear mark bits */
1.54 matt 1143: LIST_FOREACH(fp, &filehead, f_list)
1.1 cgd 1144: fp->f_flag &= ~(FMARK|FDEFER);
1.39 sommerfe 1145:
1146: /*
1147: * Iterate over the set of descriptors, marking ones believed
1148: * (based on refcount) to be referenced from a process, and
1149: * marking for rescan descriptors which are queued on a socket.
1150: */
1.1 cgd 1151: do {
1.54 matt 1152: LIST_FOREACH(fp, &filehead, f_list) {
1.95.2.3 ad 1153: mutex_enter(&fp->f_lock);
1154: if (fp->f_flag & FDEFER) {
1.1 cgd 1155: fp->f_flag &= ~FDEFER;
1156: unp_defer--;
1.39 sommerfe 1157: #ifdef DIAGNOSTIC
1158: if (fp->f_count == 0)
1159: panic("unp_gc: deferred unreferenced socket");
1160: #endif
1.1 cgd 1161: } else {
1.95.2.3 ad 1162: if (fp->f_count == 0 ||
1163: (fp->f_flag & FMARK) ||
1164: fp->f_count == fp->f_msgcount) {
1165: mutex_exit(&fp->f_lock);
1.1 cgd 1166: continue;
1.95.2.3 ad 1167: }
1.1 cgd 1168: }
1.39 sommerfe 1169: fp->f_flag |= FMARK;
1170:
1.1 cgd 1171: if (fp->f_type != DTYPE_SOCKET ||
1.95.2.3 ad 1172: (so = (struct socket *)fp->f_data) == 0 ||
1173: so->so_proto->pr_domain != &unixdomain ||
1174: (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
1175: mutex_exit(&fp->f_lock);
1.1 cgd 1176: continue;
1.95.2.3 ad 1177: }
1.1 cgd 1178: #ifdef notdef
1179: if (so->so_rcv.sb_flags & SB_LOCK) {
1180: /*
1181: * This is problematical; it's not clear
1182: * we need to wait for the sockbuf to be
1183: * unlocked (on a uniprocessor, at least),
1184: * and it's also not clear what to do
1185: * if sbwait returns an error due to receipt
1186: * of a signal. If sbwait does return
1187: * an error, we'll go into an infinite
1188: * loop. Delete all of this for now.
1189: */
1190: (void) sbwait(&so->so_rcv);
1191: goto restart;
1192: }
1193: #endif
1.95.2.3 ad 1194: mutex_exit(&fp->f_lock);
1195:
1.39 sommerfe 1196: unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1197: /*
1198: * mark descriptors referenced from sockets queued on the accept queue as well.
1199: */
1200: if (so->so_options & SO_ACCEPTCONN) {
1.54 matt 1201: TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39 sommerfe 1202: unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1203: }
1.54 matt 1204: TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39 sommerfe 1205: unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1206: }
1207: }
1.1 cgd 1208: }
1209: } while (unp_defer);
1.95.2.3 ad 1210:
1211: mutex_exit(&filelist_lock);
1212:
1.8 mycroft 1213: /*
1.39 sommerfe 1214: * Sweep pass. Find unmarked descriptors, and free them.
1215: *
1.8 mycroft 1216: * We grab an extra reference to each of the file table entries
1217: * that are not otherwise accessible and then free the rights
1218: * that are stored in messages on them.
1219: *
1.57 pk 1220: * The bug in the original code is a little tricky, so I'll describe
1.8 mycroft 1221: * what's wrong with it here.
1222: *
1223: * It is incorrect to simply unp_discard each entry for f_msgcount
1224: * times -- consider the case of sockets A and B that contain
1225: * references to each other. On a last close of some other socket,
1226: * we trigger a gc since the number of outstanding rights (unp_rights)
1227: * is non-zero. If during the sweep phase the gc code un_discards,
1228: * we end up doing a (full) closef on the descriptor. A closef on A
1229: * results in the following chain. Closef calls soo_close, which
1230: * calls soclose. Soclose calls first (through the switch
1231: * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1232: * returns because the previous instance had set unp_gcing, and
1233: * we return all the way back to soclose, which marks the socket
1234: * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1235: * to free up the rights that are queued in messages on the socket A,
1236: * i.e., the reference on B. The sorflush calls via the dom_dispose
1237: * switch unp_dispose, which unp_scans with unp_discard. This second
1238: * instance of unp_discard just calls closef on B.
1239: *
1240: * Well, a similar chain occurs on B, resulting in a sorflush on B,
1241: * which results in another closef on A. Unfortunately, A is already
1242: * being closed, and the descriptor has already been marked with
1243: * SS_NOFDREF, and soclose panics at this point.
1244: *
1245: * Here, we first take an extra reference to each inaccessible
1.39 sommerfe 1246: * descriptor. Then, if the inaccessible descriptor is a
1247: * socket, we call sorflush in case it is a Unix domain
1248: * socket. After we destroy all the rights carried in
1249: * messages, we do a last closef to get rid of our extra
1250: * reference. This is the last close, and the unp_detach etc
1251: * will shut down the socket.
1.8 mycroft 1252: *
1253: * 91/09/19, bsy@cs.cmu.edu
1254: */
1.95.2.3 ad 1255: extra_ref = kmem_alloc(nfiles * sizeof(struct file *), KM_SLEEP);
1256:
1257: mutex_enter(&filelist_lock);
1.54 matt 1258: for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11 mycroft 1259: fp = nextfp) {
1.54 matt 1260: nextfp = LIST_NEXT(fp, f_list);
1.95.2.1 ad 1261: mutex_enter(&fp->f_lock);
1.57 pk 1262: if (fp->f_count != 0 &&
1263: fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8 mycroft 1264: *fpp++ = fp;
1265: nunref++;
1266: fp->f_count++;
1267: }
1.95.2.1 ad 1268: mutex_exit(&fp->f_lock);
1.1 cgd 1269: }
1.95.2.3 ad 1270: mutex_exit(&filelist_lock);
1271:
1.39 sommerfe 1272: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45 thorpej 1273: fp = *fpp;
1.95.2.1 ad 1274: mutex_enter(&fp->f_lock);
1.44 thorpej 1275: FILE_USE(fp);
1.39 sommerfe 1276: if (fp->f_type == DTYPE_SOCKET)
1277: sorflush((struct socket *)fp->f_data);
1.44 thorpej 1278: FILE_UNUSE(fp, NULL);
1.39 sommerfe 1279: }
1.44 thorpej 1280: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45 thorpej 1281: fp = *fpp;
1.95.2.1 ad 1282: mutex_enter(&fp->f_lock);
1.44 thorpej 1283: FILE_USE(fp);
1.86 christos 1284: (void) closef(fp, (struct lwp *)0);
1.44 thorpej 1285: }
1.95.2.3 ad 1286: kmem_free(extra_ref, nfiles * sizeof(struct file *));
1.1 cgd 1287: unp_gcing = 0;
1288: }
1289:
1.5 andrew 1290: void
1.76 matt 1291: unp_dispose(struct mbuf *m)
1.1 cgd 1292: {
1.8 mycroft 1293:
1.1 cgd 1294: if (m)
1.39 sommerfe 1295: unp_scan(m, unp_discard, 1);
1.1 cgd 1296: }
1297:
1.5 andrew 1298: void
1.76 matt 1299: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1 cgd 1300: {
1.46 augustss 1301: struct mbuf *m;
1302: struct file **rp;
1303: struct cmsghdr *cm;
1304: int i;
1.1 cgd 1305: int qfds;
1306:
1307: while (m0) {
1.48 thorpej 1308: for (m = m0; m; m = m->m_next) {
1.1 cgd 1309: if (m->m_type == MT_CONTROL &&
1310: m->m_len >= sizeof(*cm)) {
1311: cm = mtod(m, struct cmsghdr *);
1312: if (cm->cmsg_level != SOL_SOCKET ||
1313: cm->cmsg_type != SCM_RIGHTS)
1314: continue;
1.48 thorpej 1315: qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
1316: / sizeof(struct file *);
1317: rp = (struct file **)CMSG_DATA(cm);
1.39 sommerfe 1318: for (i = 0; i < qfds; i++) {
1319: struct file *fp = *rp;
1320: if (discard)
1321: *rp = 0;
1322: (*op)(fp);
1323: rp++;
1324: }
1.1 cgd 1325: break; /* XXX, but saves time */
1326: }
1.48 thorpej 1327: }
1.52 thorpej 1328: m0 = m0->m_nextpkt;
1.1 cgd 1329: }
1330: }
1331:
1.5 andrew 1332: void
1.76 matt 1333: unp_mark(struct file *fp)
1.1 cgd 1334: {
1.80 perry 1335:
1.95.2.3 ad 1336: if (fp == NULL)
1.39 sommerfe 1337: return;
1.1 cgd 1338:
1.39 sommerfe 1339: /* If we're already deferred, don't screw up the defer count */
1.95.2.3 ad 1340: mutex_enter(&fp->f_lock);
1341: if (fp->f_flag & (FMARK | FDEFER)) {
1342: mutex_exit(&fp->f_lock);
1.1 cgd 1343: return;
1.95.2.3 ad 1344: }
1.39 sommerfe 1345:
1346: /*
1347: * Minimize the number of deferrals... Sockets are the only
1348: * type of descriptor which can hold references to another
1349: * descriptor, so just mark other descriptors, and defer
1350: * unmarked sockets for the next pass.
1351: */
1352: if (fp->f_type == DTYPE_SOCKET) {
1353: unp_defer++;
1354: if (fp->f_count == 0)
1355: panic("unp_mark: queued unref");
1356: fp->f_flag |= FDEFER;
1357: } else {
1358: fp->f_flag |= FMARK;
1359: }
1.95.2.3 ad 1360: mutex_exit(&fp->f_lock);
1.39 sommerfe 1361: return;
1.1 cgd 1362: }
1363:
1.5 andrew 1364: void
1.76 matt 1365: unp_discard(struct file *fp)
1.1 cgd 1366: {
1.39 sommerfe 1367: if (fp == NULL)
1368: return;
1.95.2.1 ad 1369: mutex_enter(&fp->f_lock);
1.57 pk 1370: fp->f_usecount++; /* i.e. FILE_USE(fp) sans locking */
1.1 cgd 1371: fp->f_msgcount--;
1.95.2.1 ad 1372: mutex_exit(&fp->f_lock);
1.1 cgd 1373: unp_rights--;
1.86 christos 1374: (void) closef(fp, (struct lwp *)0);
1.1 cgd 1375: }
CVSweb <webmaster@jp.NetBSD.org>