Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.80.2.1
1.80.2.1! tron 1: /* $NetBSD$ */
1.30 thorpej 2:
3: /*-
1.77 matt 4: * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30 thorpej 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9: * NASA Ames Research Center.
10: *
11: * Redistribution and use in source and binary forms, with or without
12: * modification, are permitted provided that the following conditions
13: * are met:
14: * 1. Redistributions of source code must retain the above copyright
15: * notice, this list of conditions and the following disclaimer.
16: * 2. Redistributions in binary form must reproduce the above copyright
17: * notice, this list of conditions and the following disclaimer in the
18: * documentation and/or other materials provided with the distribution.
19: * 3. All advertising materials mentioning features or use of this software
20: * must display the following acknowledgement:
21: * This product includes software developed by the NetBSD
22: * Foundation, Inc. and its contributors.
23: * 4. Neither the name of The NetBSD Foundation nor the names of its
24: * contributors may be used to endorse or promote products derived
25: * from this software without specific prior written permission.
26: *
27: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37: * POSSIBILITY OF SUCH DAMAGE.
38: */
1.10 cgd 39:
1.1 cgd 40: /*
1.8 mycroft 41: * Copyright (c) 1982, 1986, 1989, 1991, 1993
42: * The Regents of the University of California. All rights reserved.
1.1 cgd 43: *
44: * Redistribution and use in source and binary forms, with or without
45: * modification, are permitted provided that the following conditions
46: * are met:
47: * 1. Redistributions of source code must retain the above copyright
48: * notice, this list of conditions and the following disclaimer.
49: * 2. Redistributions in binary form must reproduce the above copyright
50: * notice, this list of conditions and the following disclaimer in the
51: * documentation and/or other materials provided with the distribution.
1.67 agc 52: * 3. Neither the name of the University nor the names of its contributors
53: * may be used to endorse or promote products derived from this software
54: * without specific prior written permission.
55: *
56: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66: * SUCH DAMAGE.
67: *
68: * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95
69: */
70:
71: /*
72: * Copyright (c) 1997 Christopher G. Demetriou. All rights reserved.
73: *
74: * Redistribution and use in source and binary forms, with or without
75: * modification, are permitted provided that the following conditions
76: * are met:
77: * 1. Redistributions of source code must retain the above copyright
78: * notice, this list of conditions and the following disclaimer.
79: * 2. Redistributions in binary form must reproduce the above copyright
80: * notice, this list of conditions and the following disclaimer in the
81: * documentation and/or other materials provided with the distribution.
1.1 cgd 82: * 3. All advertising materials mentioning features or use of this software
83: * must display the following acknowledgement:
84: * This product includes software developed by the University of
85: * California, Berkeley and its contributors.
86: * 4. Neither the name of the University nor the names of its contributors
87: * may be used to endorse or promote products derived from this software
88: * without specific prior written permission.
89: *
90: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
91: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
92: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
93: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
94: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
95: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
96: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
97: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
98: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
99: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
100: * SUCH DAMAGE.
101: *
1.31 fvdl 102: * @(#)uipc_usrreq.c 8.9 (Berkeley) 5/14/95
1.1 cgd 103: */
1.53 lukem 104:
105: #include <sys/cdefs.h>
1.80.2.1! tron 106: __KERNEL_RCSID(0, "$NetBSD$");
1.1 cgd 107:
1.7 mycroft 108: #include <sys/param.h>
1.8 mycroft 109: #include <sys/systm.h>
1.7 mycroft 110: #include <sys/proc.h>
111: #include <sys/filedesc.h>
112: #include <sys/domain.h>
113: #include <sys/protosw.h>
114: #include <sys/socket.h>
115: #include <sys/socketvar.h>
116: #include <sys/unpcb.h>
117: #include <sys/un.h>
118: #include <sys/namei.h>
119: #include <sys/vnode.h>
120: #include <sys/file.h>
121: #include <sys/stat.h>
122: #include <sys/mbuf.h>
1.1 cgd 123:
124: /*
125: * Unix communications domain.
126: *
127: * TODO:
128: * SEQPACKET, RDM
129: * rethink name space problems
130: * need a proper out-of-band
131: */
1.77 matt 132: const struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL };
1.1 cgd 133: ino_t unp_ino; /* prototype for fake inode numbers */
134:
1.74 junyoung 135: struct mbuf *unp_addsockcred(struct proc *, struct mbuf *);
1.30 thorpej 136:
1.20 mycroft 137: int
1.76 matt 138: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
139: struct proc *p)
1.20 mycroft 140: {
141: struct socket *so2;
1.77 matt 142: const struct sockaddr_un *sun;
1.20 mycroft 143:
144: so2 = unp->unp_conn->unp_socket;
145: if (unp->unp_addr)
146: sun = unp->unp_addr;
147: else
148: sun = &sun_noname;
1.30 thorpej 149: if (unp->unp_conn->unp_flags & UNP_WANTCRED)
150: control = unp_addsockcred(p, control);
1.20 mycroft 151: if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
152: control) == 0) {
153: m_freem(control);
154: m_freem(m);
1.79 darrenr 155: so2->so_rcv.sb_overflowed++;
1.60 christos 156: return (ENOBUFS);
1.20 mycroft 157: } else {
158: sorwakeup(so2);
159: return (0);
160: }
161: }
162:
163: void
1.76 matt 164: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20 mycroft 165: {
1.77 matt 166: const struct sockaddr_un *sun;
1.20 mycroft 167:
168: if (unp->unp_addr)
169: sun = unp->unp_addr;
170: else
171: sun = &sun_noname;
172: nam->m_len = sun->sun_len;
1.56 itojun 173: if (nam->m_len > MLEN)
1.27 thorpej 174: MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36 perry 175: memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20 mycroft 176: }
177:
178: void
1.76 matt 179: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20 mycroft 180: {
1.77 matt 181: const struct sockaddr_un *sun;
1.20 mycroft 182:
183: if (unp->unp_conn && unp->unp_conn->unp_addr)
184: sun = unp->unp_conn->unp_addr;
185: else
186: sun = &sun_noname;
187: nam->m_len = sun->sun_len;
1.56 itojun 188: if (nam->m_len > MLEN)
1.27 thorpej 189: MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36 perry 190: memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20 mycroft 191: }
192:
1.1 cgd 193: /*ARGSUSED*/
1.5 andrew 194: int
1.76 matt 195: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
196: struct mbuf *control, struct proc *p)
1.1 cgd 197: {
198: struct unpcb *unp = sotounpcb(so);
1.46 augustss 199: struct socket *so2;
1.75 christos 200: u_int newhiwat;
1.46 augustss 201: int error = 0;
1.1 cgd 202:
203: if (req == PRU_CONTROL)
204: return (EOPNOTSUPP);
1.20 mycroft 205:
1.22 mycroft 206: #ifdef DIAGNOSTIC
207: if (req != PRU_SEND && req != PRU_SENDOOB && control)
208: panic("uipc_usrreq: unexpected control mbuf");
209: #endif
1.1 cgd 210: if (unp == 0 && req != PRU_ATTACH) {
211: error = EINVAL;
212: goto release;
213: }
1.20 mycroft 214:
1.1 cgd 215: switch (req) {
216:
217: case PRU_ATTACH:
1.20 mycroft 218: if (unp != 0) {
1.1 cgd 219: error = EISCONN;
220: break;
221: }
222: error = unp_attach(so);
223: break;
224:
225: case PRU_DETACH:
226: unp_detach(unp);
227: break;
228:
229: case PRU_BIND:
1.62 fvdl 230: error = unp_bind(unp, nam, p);
1.1 cgd 231: break;
232:
233: case PRU_LISTEN:
234: if (unp->unp_vnode == 0)
235: error = EINVAL;
236: break;
237:
238: case PRU_CONNECT:
1.62 fvdl 239: error = unp_connect(so, nam, p);
1.1 cgd 240: break;
241:
242: case PRU_CONNECT2:
1.72 matt 243: error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1 cgd 244: break;
245:
246: case PRU_DISCONNECT:
247: unp_disconnect(unp);
248: break;
249:
250: case PRU_ACCEPT:
1.20 mycroft 251: unp_setpeeraddr(unp, nam);
1.72 matt 252: /*
253: * Mark the initiating STREAM socket as connected *ONLY*
254: * after it's been accepted. This prevents a client from
255: * overrunning a server and receiving ECONNREFUSED.
256: */
257: if (unp->unp_conn != NULL &&
258: (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
259: soisconnected(unp->unp_conn->unp_socket);
1.1 cgd 260: break;
261:
262: case PRU_SHUTDOWN:
263: socantsendmore(so);
264: unp_shutdown(unp);
265: break;
266:
267: case PRU_RCVD:
268: switch (so->so_type) {
269:
270: case SOCK_DGRAM:
271: panic("uipc 1");
272: /*NOTREACHED*/
273:
274: case SOCK_STREAM:
275: #define rcv (&so->so_rcv)
276: #define snd (&so2->so_snd)
277: if (unp->unp_conn == 0)
278: break;
279: so2 = unp->unp_conn->unp_socket;
280: /*
281: * Adjust backpressure on sender
282: * and wakeup any waiting to write.
283: */
284: snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
285: unp->unp_mbcnt = rcv->sb_mbcnt;
1.75 christos 286: newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
287: (void)chgsbsize(so2->so_uid,
288: &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1 cgd 289: unp->unp_cc = rcv->sb_cc;
290: sowwakeup(so2);
291: #undef snd
292: #undef rcv
293: break;
294:
295: default:
296: panic("uipc 2");
297: }
298: break;
299:
300: case PRU_SEND:
1.30 thorpej 301: /*
302: * Note: unp_internalize() rejects any control message
303: * other than SCM_RIGHTS, and only allows one. This
304: * has the side-effect of preventing a caller from
305: * forging SCM_CREDS.
306: */
1.80.2.1! tron 307: if (control && (error = unp_internalize(control, p))) {
! 308: goto die;
! 309: }
1.1 cgd 310: switch (so->so_type) {
311:
312: case SOCK_DGRAM: {
313: if (nam) {
1.20 mycroft 314: if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1 cgd 315: error = EISCONN;
1.21 mycroft 316: goto die;
1.1 cgd 317: }
1.62 fvdl 318: error = unp_connect(so, nam, p);
1.20 mycroft 319: if (error) {
1.23 mycroft 320: die:
1.21 mycroft 321: m_freem(control);
1.20 mycroft 322: m_freem(m);
1.1 cgd 323: break;
1.20 mycroft 324: }
1.1 cgd 325: } else {
1.20 mycroft 326: if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1 cgd 327: error = ENOTCONN;
1.21 mycroft 328: goto die;
1.1 cgd 329: }
330: }
1.30 thorpej 331: error = unp_output(m, control, unp, p);
1.1 cgd 332: if (nam)
333: unp_disconnect(unp);
334: break;
335: }
336:
337: case SOCK_STREAM:
338: #define rcv (&so2->so_rcv)
339: #define snd (&so->so_snd)
340: if (unp->unp_conn == 0)
341: panic("uipc 3");
342: so2 = unp->unp_conn->unp_socket;
1.30 thorpej 343: if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
344: /*
345: * Credentials are passed only once on
346: * SOCK_STREAM.
347: */
348: unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
349: control = unp_addsockcred(p, control);
350: }
1.1 cgd 351: /*
352: * Send to paired receive port, and then reduce
353: * send buffer hiwater marks to maintain backpressure.
354: * Wake up readers.
355: */
356: if (control) {
1.21 mycroft 357: if (sbappendcontrol(rcv, m, control) == 0)
358: m_freem(control);
1.1 cgd 359: } else
360: sbappend(rcv, m);
361: snd->sb_mbmax -=
362: rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
363: unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75 christos 364: newhiwat = snd->sb_hiwat -
365: (rcv->sb_cc - unp->unp_conn->unp_cc);
366: (void)chgsbsize(so->so_uid,
367: &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1 cgd 368: unp->unp_conn->unp_cc = rcv->sb_cc;
369: sorwakeup(so2);
370: #undef snd
371: #undef rcv
372: break;
373:
374: default:
375: panic("uipc 4");
376: }
377: break;
378:
379: case PRU_ABORT:
380: unp_drop(unp, ECONNABORTED);
1.39 sommerfe 381:
382: #ifdef DIAGNOSTIC
383: if (so->so_pcb == 0)
384: panic("uipc 5: drop killed pcb");
385: #endif
386: unp_detach(unp);
1.1 cgd 387: break;
388:
389: case PRU_SENSE:
390: ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
391: if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
392: so2 = unp->unp_conn->unp_socket;
393: ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
394: }
395: ((struct stat *) m)->st_dev = NODEV;
396: if (unp->unp_ino == 0)
397: unp->unp_ino = unp_ino++;
1.25 kleink 398: ((struct stat *) m)->st_atimespec =
399: ((struct stat *) m)->st_mtimespec =
400: ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1 cgd 401: ((struct stat *) m)->st_ino = unp->unp_ino;
402: return (0);
403:
404: case PRU_RCVOOB:
1.20 mycroft 405: error = EOPNOTSUPP;
406: break;
1.1 cgd 407:
408: case PRU_SENDOOB:
1.22 mycroft 409: m_freem(control);
1.20 mycroft 410: m_freem(m);
1.1 cgd 411: error = EOPNOTSUPP;
412: break;
413:
414: case PRU_SOCKADDR:
1.20 mycroft 415: unp_setsockaddr(unp, nam);
1.1 cgd 416: break;
417:
418: case PRU_PEERADDR:
1.20 mycroft 419: unp_setpeeraddr(unp, nam);
1.1 cgd 420: break;
421:
422: default:
423: panic("piusrreq");
424: }
1.20 mycroft 425:
1.1 cgd 426: release:
427: return (error);
428: }
429:
430: /*
1.30 thorpej 431: * Unix domain socket option processing.
432: */
433: int
1.76 matt 434: uipc_ctloutput(int op, struct socket *so, int level, int optname,
435: struct mbuf **mp)
1.30 thorpej 436: {
437: struct unpcb *unp = sotounpcb(so);
438: struct mbuf *m = *mp;
439: int optval = 0, error = 0;
440:
441: if (level != 0) {
442: error = EINVAL;
443: if (op == PRCO_SETOPT && m)
444: (void) m_free(m);
445: } else switch (op) {
446:
447: case PRCO_SETOPT:
448: switch (optname) {
449: case LOCAL_CREDS:
1.72 matt 450: case LOCAL_CONNWAIT:
1.30 thorpej 451: if (m == NULL || m->m_len != sizeof(int))
452: error = EINVAL;
453: else {
454: optval = *mtod(m, int *);
455: switch (optname) {
456: #define OPTSET(bit) \
457: if (optval) \
458: unp->unp_flags |= (bit); \
459: else \
460: unp->unp_flags &= ~(bit);
461:
462: case LOCAL_CREDS:
463: OPTSET(UNP_WANTCRED);
464: break;
1.72 matt 465: case LOCAL_CONNWAIT:
466: OPTSET(UNP_CONNWAIT);
467: break;
1.30 thorpej 468: }
469: }
470: break;
471: #undef OPTSET
472:
473: default:
474: error = ENOPROTOOPT;
475: break;
476: }
477: if (m)
478: (void) m_free(m);
479: break;
480:
481: case PRCO_GETOPT:
482: switch (optname) {
483: case LOCAL_CREDS:
484: *mp = m = m_get(M_WAIT, MT_SOOPTS);
485: m->m_len = sizeof(int);
486: switch (optname) {
487:
488: #define OPTBIT(bit) (unp->unp_flags & (bit) ? 1 : 0)
489:
490: case LOCAL_CREDS:
491: optval = OPTBIT(UNP_WANTCRED);
492: break;
493: }
494: *mtod(m, int *) = optval;
495: break;
496: #undef OPTBIT
497:
498: default:
499: error = ENOPROTOOPT;
500: break;
501: }
502: break;
503: }
504: return (error);
505: }
506:
507: /*
1.1 cgd 508: * Both send and receive buffers are allocated PIPSIZ bytes of buffering
509: * for stream sockets, although the total for sender and receiver is
510: * actually only PIPSIZ.
511: * Datagram sockets really use the sendspace as the maximum datagram size,
512: * and don't really want to reserve the sendspace. Their recvspace should
513: * be large enough for at least one max-size datagram plus address.
514: */
515: #define PIPSIZ 4096
516: u_long unpst_sendspace = PIPSIZ;
517: u_long unpst_recvspace = PIPSIZ;
518: u_long unpdg_sendspace = 2*1024; /* really max datagram size */
519: u_long unpdg_recvspace = 4*1024;
520:
521: int unp_rights; /* file descriptors in flight */
522:
1.5 andrew 523: int
1.76 matt 524: unp_attach(struct socket *so)
1.1 cgd 525: {
1.46 augustss 526: struct unpcb *unp;
1.25 kleink 527: struct timeval tv;
1.1 cgd 528: int error;
1.80 perry 529:
1.1 cgd 530: if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
531: switch (so->so_type) {
532:
533: case SOCK_STREAM:
534: error = soreserve(so, unpst_sendspace, unpst_recvspace);
535: break;
536:
537: case SOCK_DGRAM:
538: error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
539: break;
1.8 mycroft 540:
541: default:
542: panic("unp_attach");
1.1 cgd 543: }
544: if (error)
545: return (error);
546: }
1.14 mycroft 547: unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
548: if (unp == NULL)
1.1 cgd 549: return (ENOBUFS);
1.36 perry 550: memset((caddr_t)unp, 0, sizeof(*unp));
1.14 mycroft 551: unp->unp_socket = so;
1.15 mycroft 552: so->so_pcb = unp;
1.25 kleink 553: microtime(&tv);
554: TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
1.1 cgd 555: return (0);
556: }
557:
1.17 pk 558: void
1.76 matt 559: unp_detach(struct unpcb *unp)
1.1 cgd 560: {
1.80 perry 561:
1.1 cgd 562: if (unp->unp_vnode) {
563: unp->unp_vnode->v_socket = 0;
564: vrele(unp->unp_vnode);
565: unp->unp_vnode = 0;
566: }
567: if (unp->unp_conn)
568: unp_disconnect(unp);
569: while (unp->unp_refs)
570: unp_drop(unp->unp_refs, ECONNRESET);
571: soisdisconnected(unp->unp_socket);
572: unp->unp_socket->so_pcb = 0;
1.20 mycroft 573: if (unp->unp_addr)
1.26 thorpej 574: free(unp->unp_addr, M_SONAME);
1.8 mycroft 575: if (unp_rights) {
576: /*
577: * Normally the receive buffer is flushed later,
578: * in sofree, but if our receive buffer holds references
579: * to descriptors that are now garbage, we will dispose
580: * of those descriptor references after the garbage collector
581: * gets them (resulting in a "panic: closef: count < 0").
582: */
583: sorflush(unp->unp_socket);
1.14 mycroft 584: free(unp, M_PCB);
1.1 cgd 585: unp_gc();
1.14 mycroft 586: } else
587: free(unp, M_PCB);
1.1 cgd 588: }
589:
1.5 andrew 590: int
1.76 matt 591: unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
1.1 cgd 592: {
1.27 thorpej 593: struct sockaddr_un *sun;
1.46 augustss 594: struct vnode *vp;
1.70 hannken 595: struct mount *mp;
1.1 cgd 596: struct vattr vattr;
1.27 thorpej 597: size_t addrlen;
1.1 cgd 598: int error;
599: struct nameidata nd;
600:
1.20 mycroft 601: if (unp->unp_vnode != 0)
602: return (EINVAL);
1.27 thorpej 603:
604: /*
605: * Allocate the new sockaddr. We have to allocate one
606: * extra byte so that we can ensure that the pathname
607: * is nul-terminated.
608: */
609: addrlen = nam->m_len + 1;
610: sun = malloc(addrlen, M_SONAME, M_WAITOK);
611: m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
612: *(((char *)sun) + nam->m_len) = '\0';
613:
1.70 hannken 614: restart:
1.9 mycroft 615: NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1.62 fvdl 616: sun->sun_path, p);
1.27 thorpej 617:
1.1 cgd 618: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16 christos 619: if ((error = namei(&nd)) != 0)
1.27 thorpej 620: goto bad;
1.9 mycroft 621: vp = nd.ni_vp;
1.70 hannken 622: if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1.9 mycroft 623: VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
624: if (nd.ni_dvp == vp)
625: vrele(nd.ni_dvp);
1.1 cgd 626: else
1.9 mycroft 627: vput(nd.ni_dvp);
1.1 cgd 628: vrele(vp);
1.70 hannken 629: if (vp != NULL) {
630: error = EADDRINUSE;
631: goto bad;
632: }
633: error = vn_start_write(NULL, &mp,
634: V_WAIT | V_SLEEPONLY | V_PCATCH);
635: if (error)
636: goto bad;
637: goto restart;
1.1 cgd 638: }
639: VATTR_NULL(&vattr);
640: vattr.va_type = VSOCK;
1.9 mycroft 641: vattr.va_mode = ACCESSPERMS;
1.62 fvdl 642: VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1.16 christos 643: error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1.70 hannken 644: vn_finished_write(mp, 0);
1.16 christos 645: if (error)
1.27 thorpej 646: goto bad;
1.9 mycroft 647: vp = nd.ni_vp;
1.1 cgd 648: vp->v_socket = unp->unp_socket;
649: unp->unp_vnode = vp;
1.27 thorpej 650: unp->unp_addrlen = addrlen;
651: unp->unp_addr = sun;
1.31 fvdl 652: VOP_UNLOCK(vp, 0);
1.1 cgd 653: return (0);
1.27 thorpej 654:
655: bad:
656: free(sun, M_SONAME);
657: return (error);
1.1 cgd 658: }
659:
1.5 andrew 660: int
1.76 matt 661: unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
1.1 cgd 662: {
1.46 augustss 663: struct sockaddr_un *sun;
664: struct vnode *vp;
665: struct socket *so2, *so3;
1.1 cgd 666: struct unpcb *unp2, *unp3;
1.27 thorpej 667: size_t addrlen;
1.1 cgd 668: int error;
669: struct nameidata nd;
670:
1.27 thorpej 671: /*
672: * Allocate a temporary sockaddr. We have to allocate one extra
673: * byte so that we can ensure that the pathname is nul-terminated.
674: * When we establish the connection, we copy the other PCB's
675: * sockaddr to our own.
676: */
677: addrlen = nam->m_len + 1;
678: sun = malloc(addrlen, M_SONAME, M_WAITOK);
679: m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
680: *(((char *)sun) + nam->m_len) = '\0';
681:
1.62 fvdl 682: NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
1.27 thorpej 683:
1.16 christos 684: if ((error = namei(&nd)) != 0)
1.27 thorpej 685: goto bad2;
1.9 mycroft 686: vp = nd.ni_vp;
1.1 cgd 687: if (vp->v_type != VSOCK) {
688: error = ENOTSOCK;
689: goto bad;
690: }
1.62 fvdl 691: if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
1.1 cgd 692: goto bad;
693: so2 = vp->v_socket;
694: if (so2 == 0) {
695: error = ECONNREFUSED;
696: goto bad;
697: }
698: if (so->so_type != so2->so_type) {
699: error = EPROTOTYPE;
700: goto bad;
701: }
702: if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
703: if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
704: (so3 = sonewconn(so2, 0)) == 0) {
705: error = ECONNREFUSED;
706: goto bad;
707: }
708: unp2 = sotounpcb(so2);
709: unp3 = sotounpcb(so3);
1.26 thorpej 710: if (unp2->unp_addr) {
711: unp3->unp_addr = malloc(unp2->unp_addrlen,
712: M_SONAME, M_WAITOK);
1.36 perry 713: memcpy(unp3->unp_addr, unp2->unp_addr,
1.26 thorpej 714: unp2->unp_addrlen);
715: unp3->unp_addrlen = unp2->unp_addrlen;
716: }
1.30 thorpej 717: unp3->unp_flags = unp2->unp_flags;
1.33 thorpej 718: so2 = so3;
719: }
1.72 matt 720: error = unp_connect2(so, so2, PRU_CONNECT);
1.27 thorpej 721: bad:
1.1 cgd 722: vput(vp);
1.27 thorpej 723: bad2:
724: free(sun, M_SONAME);
1.1 cgd 725: return (error);
726: }
727:
1.5 andrew 728: int
1.76 matt 729: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1 cgd 730: {
1.46 augustss 731: struct unpcb *unp = sotounpcb(so);
732: struct unpcb *unp2;
1.1 cgd 733:
734: if (so2->so_type != so->so_type)
735: return (EPROTOTYPE);
736: unp2 = sotounpcb(so2);
737: unp->unp_conn = unp2;
738: switch (so->so_type) {
739:
740: case SOCK_DGRAM:
741: unp->unp_nextref = unp2->unp_refs;
742: unp2->unp_refs = unp;
743: soisconnected(so);
744: break;
745:
746: case SOCK_STREAM:
747: unp2->unp_conn = unp;
1.72 matt 748: if (req == PRU_CONNECT &&
749: ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
750: soisconnecting(so);
751: else
752: soisconnected(so);
1.1 cgd 753: soisconnected(so2);
754: break;
755:
756: default:
757: panic("unp_connect2");
758: }
759: return (0);
760: }
761:
1.5 andrew 762: void
1.76 matt 763: unp_disconnect(struct unpcb *unp)
1.1 cgd 764: {
1.46 augustss 765: struct unpcb *unp2 = unp->unp_conn;
1.1 cgd 766:
767: if (unp2 == 0)
768: return;
769: unp->unp_conn = 0;
770: switch (unp->unp_socket->so_type) {
771:
772: case SOCK_DGRAM:
773: if (unp2->unp_refs == unp)
774: unp2->unp_refs = unp->unp_nextref;
775: else {
776: unp2 = unp2->unp_refs;
777: for (;;) {
778: if (unp2 == 0)
779: panic("unp_disconnect");
780: if (unp2->unp_nextref == unp)
781: break;
782: unp2 = unp2->unp_nextref;
783: }
784: unp2->unp_nextref = unp->unp_nextref;
785: }
786: unp->unp_nextref = 0;
787: unp->unp_socket->so_state &= ~SS_ISCONNECTED;
788: break;
789:
790: case SOCK_STREAM:
791: soisdisconnected(unp->unp_socket);
792: unp2->unp_conn = 0;
793: soisdisconnected(unp2->unp_socket);
794: break;
795: }
796: }
797:
798: #ifdef notdef
1.76 matt 799: unp_abort(struct unpcb *unp)
1.1 cgd 800: {
801: unp_detach(unp);
802: }
803: #endif
804:
1.5 andrew 805: void
1.76 matt 806: unp_shutdown(struct unpcb *unp)
1.1 cgd 807: {
808: struct socket *so;
809:
810: if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
811: (so = unp->unp_conn->unp_socket))
812: socantrcvmore(so);
813: }
814:
1.5 andrew 815: void
1.76 matt 816: unp_drop(struct unpcb *unp, int errno)
1.1 cgd 817: {
818: struct socket *so = unp->unp_socket;
819:
820: so->so_error = errno;
821: unp_disconnect(unp);
822: if (so->so_head) {
1.15 mycroft 823: so->so_pcb = 0;
1.14 mycroft 824: sofree(so);
1.20 mycroft 825: if (unp->unp_addr)
1.26 thorpej 826: free(unp->unp_addr, M_SONAME);
1.14 mycroft 827: free(unp, M_PCB);
1.1 cgd 828: }
829: }
830:
831: #ifdef notdef
1.76 matt 832: unp_drain(void)
1.1 cgd 833: {
834:
835: }
836: #endif
837:
1.5 andrew 838: int
1.78 jonathan 839: unp_externalize(struct mbuf *rights, struct proc *p)
1.1 cgd 840: {
1.46 augustss 841: struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.47 thorpej 842: int i, *fdp;
1.46 augustss 843: struct file **rp;
844: struct file *fp;
1.50 thorpej 845: int nfds, error = 0;
1.47 thorpej 846:
847: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
848: sizeof(struct file *);
849: rp = (struct file **)CMSG_DATA(cm);
1.1 cgd 850:
1.50 thorpej 851: fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
852:
1.39 sommerfe 853: /* Make sure the recipient should be able to see the descriptors.. */
1.42 thorpej 854: if (p->p_cwdi->cwdi_rdir != NULL) {
1.48 thorpej 855: rp = (struct file **)CMSG_DATA(cm);
1.39 sommerfe 856: for (i = 0; i < nfds; i++) {
857: fp = *rp++;
858: /*
859: * If we are in a chroot'ed directory, and
860: * someone wants to pass us a directory, make
861: * sure it's inside the subtree we're allowed
862: * to access.
863: */
864: if (fp->f_type == DTYPE_VNODE) {
865: struct vnode *vp = (struct vnode *)fp->f_data;
866: if ((vp->v_type == VDIR) &&
1.62 fvdl 867: !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
1.39 sommerfe 868: error = EPERM;
869: break;
870: }
871: }
872: }
873: }
1.50 thorpej 874:
875: restart:
1.47 thorpej 876: rp = (struct file **)CMSG_DATA(cm);
1.50 thorpej 877: if (error != 0) {
1.24 cgd 878: for (i = 0; i < nfds; i++) {
1.1 cgd 879: fp = *rp;
1.39 sommerfe 880: /*
881: * zero the pointer before calling unp_discard,
882: * since it may end up in unp_gc()..
883: */
884: *rp++ = 0;
1.1 cgd 885: unp_discard(fp);
886: }
1.50 thorpej 887: goto out;
1.1 cgd 888: }
1.50 thorpej 889:
1.24 cgd 890: /*
1.50 thorpej 891: * First loop -- allocate file descriptor table slots for the
892: * new descriptors.
1.24 cgd 893: */
894: for (i = 0; i < nfds; i++) {
1.39 sommerfe 895: fp = *rp++;
1.50 thorpej 896: if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49 thorpej 897: /*
1.50 thorpej 898: * Back out what we've done so far.
1.49 thorpej 899: */
1.50 thorpej 900: for (--i; i >= 0; i--)
901: fdremove(p->p_fd, fdp[i]);
902:
903: if (error == ENOSPC) {
904: fdexpand(p);
905: error = 0;
906: } else {
907: /*
908: * This is the error that has historically
909: * been returned, and some callers may
910: * expect it.
911: */
912: error = EMSGSIZE;
913: }
914: goto restart;
1.49 thorpej 915: }
1.50 thorpej 916:
917: /*
918: * Make the slot reference the descriptor so that
919: * fdalloc() works properly.. We finalize it all
920: * in the loop below.
921: */
922: p->p_fd->fd_ofiles[fdp[i]] = fp;
1.1 cgd 923: }
1.24 cgd 924:
925: /*
1.50 thorpej 926: * Now that adding them has succeeded, update all of the
927: * descriptor passing state.
1.24 cgd 928: */
1.50 thorpej 929: rp = (struct file **)CMSG_DATA(cm);
930: for (i = 0; i < nfds; i++) {
931: fp = *rp++;
932: fp->f_msgcount--;
933: unp_rights--;
934: }
935:
936: /*
937: * Copy temporary array to message and adjust length, in case of
938: * transition from large struct file pointers to ints.
939: */
940: memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47 thorpej 941: cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
942: rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50 thorpej 943: out:
944: free(fdp, M_TEMP);
945: return (error);
1.1 cgd 946: }
947:
1.5 andrew 948: int
1.76 matt 949: unp_internalize(struct mbuf *control, struct proc *p)
1.1 cgd 950: {
1.24 cgd 951: struct filedesc *fdescp = p->p_fd;
1.73 martin 952: struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
953: struct file **rp, **files;
1.46 augustss 954: struct file *fp;
955: int i, fd, *fdp;
1.24 cgd 956: int nfds;
957: u_int neededspace;
1.38 thorpej 958:
1.24 cgd 959: /* Sanity check the control message header */
1.66 jdolecek 960: if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1 cgd 961: cm->cmsg_len != control->m_len)
962: return (EINVAL);
1.24 cgd 963:
964: /* Verify that the file descriptors are valid */
1.47 thorpej 965: nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
966: fdp = (int *)CMSG_DATA(cm);
1.24 cgd 967: for (i = 0; i < nfds; i++) {
968: fd = *fdp++;
1.58 pk 969: if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1 cgd 970: return (EBADF);
1.58 pk 971: simple_unlock(&fp->f_slock);
1.1 cgd 972: }
1.24 cgd 973:
974: /* Make sure we have room for the struct file pointers */
1.47 thorpej 975: neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
976: control->m_len;
1.24 cgd 977: if (neededspace > M_TRAILINGSPACE(control)) {
978:
1.73 martin 979: /* allocate new space and copy header into it */
980: newcm = malloc(
981: CMSG_SPACE(nfds * sizeof(struct file *)),
982: M_MBUF, M_WAITOK);
983: if (newcm == NULL)
1.24 cgd 984: return (E2BIG);
1.73 martin 985: memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80 perry 986: files = (struct file **)CMSG_DATA(newcm);
1.73 martin 987: } else {
988: /* we can convert in-place */
989: newcm = NULL;
990: files = (struct file **)CMSG_DATA(cm);
1.24 cgd 991: }
992:
993: /*
994: * Transform the file descriptors into struct file pointers, in
995: * reverse order so that if pointers are bigger than ints, the
996: * int won't get until we're done.
997: */
1.73 martin 998: fdp = (int *)CMSG_DATA(cm) + nfds - 1;
999: rp = files + nfds - 1;
1.24 cgd 1000: for (i = 0; i < nfds; i++) {
1.28 christos 1001: fp = fdescp->fd_ofiles[*fdp--];
1.57 pk 1002: simple_lock(&fp->f_slock);
1003: #ifdef DIAGNOSTIC
1004: if (fp->f_iflags & FIF_WANTCLOSE)
1005: panic("unp_internalize: file already closed");
1006: #endif
1.24 cgd 1007: *rp-- = fp;
1.1 cgd 1008: fp->f_count++;
1009: fp->f_msgcount++;
1.57 pk 1010: simple_unlock(&fp->f_slock);
1.1 cgd 1011: unp_rights++;
1012: }
1.73 martin 1013:
1014: if (newcm) {
1015: if (control->m_flags & M_EXT)
1016: MEXTREMOVE(control);
1017: MEXTADD(control, newcm,
1018: CMSG_SPACE(nfds * sizeof(struct file *)),
1019: M_MBUF, NULL, NULL);
1020: cm = newcm;
1021: }
1022:
1023: /* adjust message & mbuf to note amount of space actually used. */
1024: cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
1025: control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
1026:
1.1 cgd 1027: return (0);
1.30 thorpej 1028: }
1029:
1030: struct mbuf *
1.76 matt 1031: unp_addsockcred(struct proc *p, struct mbuf *control)
1.30 thorpej 1032: {
1033: struct cmsghdr *cmp;
1034: struct sockcred *sc;
1035: struct mbuf *m, *n;
1.47 thorpej 1036: int len, space, i;
1.30 thorpej 1037:
1.47 thorpej 1038: len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1039: space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1.30 thorpej 1040:
1041: m = m_get(M_WAIT, MT_CONTROL);
1.47 thorpej 1042: if (space > MLEN) {
1043: if (space > MCLBYTES)
1044: MEXTMALLOC(m, space, M_WAITOK);
1.30 thorpej 1045: else
1.59 matt 1046: m_clget(m, M_WAIT);
1.30 thorpej 1047: if ((m->m_flags & M_EXT) == 0) {
1048: m_free(m);
1049: return (control);
1050: }
1051: }
1052:
1.47 thorpej 1053: m->m_len = space;
1.30 thorpej 1054: m->m_next = NULL;
1055: cmp = mtod(m, struct cmsghdr *);
1056: sc = (struct sockcred *)CMSG_DATA(cmp);
1057: cmp->cmsg_len = len;
1058: cmp->cmsg_level = SOL_SOCKET;
1059: cmp->cmsg_type = SCM_CREDS;
1060: sc->sc_uid = p->p_cred->p_ruid;
1061: sc->sc_euid = p->p_ucred->cr_uid;
1062: sc->sc_gid = p->p_cred->p_rgid;
1063: sc->sc_egid = p->p_ucred->cr_gid;
1064: sc->sc_ngroups = p->p_ucred->cr_ngroups;
1065: for (i = 0; i < sc->sc_ngroups; i++)
1066: sc->sc_groups[i] = p->p_ucred->cr_groups[i];
1067:
1068: /*
1069: * If a control message already exists, append us to the end.
1070: */
1071: if (control != NULL) {
1072: for (n = control; n->m_next != NULL; n = n->m_next)
1073: ;
1074: n->m_next = m;
1075: } else
1076: control = m;
1077:
1078: return (control);
1.1 cgd 1079: }
1080:
1081: int unp_defer, unp_gcing;
1082: extern struct domain unixdomain;
1083:
1.39 sommerfe 1084: /*
1085: * Comment added long after the fact explaining what's going on here.
1086: * Do a mark-sweep GC of file descriptors on the system, to free up
1087: * any which are caught in flight to an about-to-be-closed socket.
1088: *
1089: * Traditional mark-sweep gc's start at the "root", and mark
1090: * everything reachable from the root (which, in our case would be the
1091: * process table). The mark bits are cleared during the sweep.
1092: *
1093: * XXX For some inexplicable reason (perhaps because the file
1094: * descriptor tables used to live in the u area which could be swapped
1095: * out and thus hard to reach), we do multiple scans over the set of
1096: * descriptors, using use *two* mark bits per object (DEFER and MARK).
1097: * Whenever we find a descriptor which references other descriptors,
1098: * the ones it references are marked with both bits, and we iterate
1099: * over the whole file table until there are no more DEFER bits set.
1100: * We also make an extra pass *before* the GC to clear the mark bits,
1101: * which could have been cleared at almost no cost during the previous
1102: * sweep.
1103: *
1104: * XXX MP: this needs to run with locks such that no other thread of
1105: * control can create or destroy references to file descriptors. it
1106: * may be necessary to defer the GC until later (when the locking
1107: * situation is more hospitable); it may be necessary to push this
1108: * into a separate thread.
1109: */
1.5 andrew 1110: void
1.76 matt 1111: unp_gc(void)
1.1 cgd 1112: {
1.46 augustss 1113: struct file *fp, *nextfp;
1114: struct socket *so, *so1;
1.8 mycroft 1115: struct file **extra_ref, **fpp;
1116: int nunref, i;
1.1 cgd 1117:
1118: if (unp_gcing)
1119: return;
1120: unp_gcing = 1;
1121: unp_defer = 0;
1.39 sommerfe 1122:
1123: /* Clear mark bits */
1.54 matt 1124: LIST_FOREACH(fp, &filehead, f_list)
1.1 cgd 1125: fp->f_flag &= ~(FMARK|FDEFER);
1.39 sommerfe 1126:
1127: /*
1128: * Iterate over the set of descriptors, marking ones believed
1129: * (based on refcount) to be referenced from a process, and
1130: * marking for rescan descriptors which are queued on a socket.
1131: */
1.1 cgd 1132: do {
1.54 matt 1133: LIST_FOREACH(fp, &filehead, f_list) {
1.1 cgd 1134: if (fp->f_flag & FDEFER) {
1135: fp->f_flag &= ~FDEFER;
1136: unp_defer--;
1.39 sommerfe 1137: #ifdef DIAGNOSTIC
1138: if (fp->f_count == 0)
1139: panic("unp_gc: deferred unreferenced socket");
1140: #endif
1.1 cgd 1141: } else {
1.39 sommerfe 1142: if (fp->f_count == 0)
1143: continue;
1.1 cgd 1144: if (fp->f_flag & FMARK)
1145: continue;
1146: if (fp->f_count == fp->f_msgcount)
1147: continue;
1148: }
1.39 sommerfe 1149: fp->f_flag |= FMARK;
1150:
1.1 cgd 1151: if (fp->f_type != DTYPE_SOCKET ||
1152: (so = (struct socket *)fp->f_data) == 0)
1153: continue;
1154: if (so->so_proto->pr_domain != &unixdomain ||
1155: (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1156: continue;
1157: #ifdef notdef
1158: if (so->so_rcv.sb_flags & SB_LOCK) {
1159: /*
1160: * This is problematical; it's not clear
1161: * we need to wait for the sockbuf to be
1162: * unlocked (on a uniprocessor, at least),
1163: * and it's also not clear what to do
1164: * if sbwait returns an error due to receipt
1165: * of a signal. If sbwait does return
1166: * an error, we'll go into an infinite
1167: * loop. Delete all of this for now.
1168: */
1169: (void) sbwait(&so->so_rcv);
1170: goto restart;
1171: }
1172: #endif
1.39 sommerfe 1173: unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1174: /*
1175: * mark descriptors referenced from sockets queued on the accept queue as well.
1176: */
1177: if (so->so_options & SO_ACCEPTCONN) {
1.54 matt 1178: TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39 sommerfe 1179: unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1180: }
1.54 matt 1181: TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39 sommerfe 1182: unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
1183: }
1184: }
1.80 perry 1185:
1.1 cgd 1186: }
1187: } while (unp_defer);
1.8 mycroft 1188: /*
1.39 sommerfe 1189: * Sweep pass. Find unmarked descriptors, and free them.
1190: *
1.8 mycroft 1191: * We grab an extra reference to each of the file table entries
1192: * that are not otherwise accessible and then free the rights
1193: * that are stored in messages on them.
1194: *
1.57 pk 1195: * The bug in the original code is a little tricky, so I'll describe
1.8 mycroft 1196: * what's wrong with it here.
1197: *
1198: * It is incorrect to simply unp_discard each entry for f_msgcount
1199: * times -- consider the case of sockets A and B that contain
1200: * references to each other. On a last close of some other socket,
1201: * we trigger a gc since the number of outstanding rights (unp_rights)
1202: * is non-zero. If during the sweep phase the gc code un_discards,
1203: * we end up doing a (full) closef on the descriptor. A closef on A
1204: * results in the following chain. Closef calls soo_close, which
1205: * calls soclose. Soclose calls first (through the switch
1206: * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1207: * returns because the previous instance had set unp_gcing, and
1208: * we return all the way back to soclose, which marks the socket
1209: * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1210: * to free up the rights that are queued in messages on the socket A,
1211: * i.e., the reference on B. The sorflush calls via the dom_dispose
1212: * switch unp_dispose, which unp_scans with unp_discard. This second
1213: * instance of unp_discard just calls closef on B.
1214: *
1215: * Well, a similar chain occurs on B, resulting in a sorflush on B,
1216: * which results in another closef on A. Unfortunately, A is already
1217: * being closed, and the descriptor has already been marked with
1218: * SS_NOFDREF, and soclose panics at this point.
1219: *
1220: * Here, we first take an extra reference to each inaccessible
1.39 sommerfe 1221: * descriptor. Then, if the inaccessible descriptor is a
1222: * socket, we call sorflush in case it is a Unix domain
1223: * socket. After we destroy all the rights carried in
1224: * messages, we do a last closef to get rid of our extra
1225: * reference. This is the last close, and the unp_detach etc
1226: * will shut down the socket.
1.8 mycroft 1227: *
1228: * 91/09/19, bsy@cs.cmu.edu
1229: */
1230: extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1.54 matt 1231: for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11 mycroft 1232: fp = nextfp) {
1.54 matt 1233: nextfp = LIST_NEXT(fp, f_list);
1.57 pk 1234: simple_lock(&fp->f_slock);
1235: if (fp->f_count != 0 &&
1236: fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8 mycroft 1237: *fpp++ = fp;
1238: nunref++;
1239: fp->f_count++;
1240: }
1.57 pk 1241: simple_unlock(&fp->f_slock);
1.1 cgd 1242: }
1.39 sommerfe 1243: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45 thorpej 1244: fp = *fpp;
1.57 pk 1245: simple_lock(&fp->f_slock);
1.44 thorpej 1246: FILE_USE(fp);
1.39 sommerfe 1247: if (fp->f_type == DTYPE_SOCKET)
1248: sorflush((struct socket *)fp->f_data);
1.44 thorpej 1249: FILE_UNUSE(fp, NULL);
1.39 sommerfe 1250: }
1.44 thorpej 1251: for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45 thorpej 1252: fp = *fpp;
1.57 pk 1253: simple_lock(&fp->f_slock);
1.44 thorpej 1254: FILE_USE(fp);
1.62 fvdl 1255: (void) closef(fp, (struct proc *)0);
1.44 thorpej 1256: }
1.8 mycroft 1257: free((caddr_t)extra_ref, M_FILE);
1.1 cgd 1258: unp_gcing = 0;
1259: }
1260:
1.5 andrew 1261: void
1.76 matt 1262: unp_dispose(struct mbuf *m)
1.1 cgd 1263: {
1.8 mycroft 1264:
1.1 cgd 1265: if (m)
1.39 sommerfe 1266: unp_scan(m, unp_discard, 1);
1.1 cgd 1267: }
1268:
1.5 andrew 1269: void
1.76 matt 1270: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1 cgd 1271: {
1.46 augustss 1272: struct mbuf *m;
1273: struct file **rp;
1274: struct cmsghdr *cm;
1275: int i;
1.1 cgd 1276: int qfds;
1277:
1278: while (m0) {
1.48 thorpej 1279: for (m = m0; m; m = m->m_next) {
1.1 cgd 1280: if (m->m_type == MT_CONTROL &&
1281: m->m_len >= sizeof(*cm)) {
1282: cm = mtod(m, struct cmsghdr *);
1283: if (cm->cmsg_level != SOL_SOCKET ||
1284: cm->cmsg_type != SCM_RIGHTS)
1285: continue;
1.48 thorpej 1286: qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
1287: / sizeof(struct file *);
1288: rp = (struct file **)CMSG_DATA(cm);
1.39 sommerfe 1289: for (i = 0; i < qfds; i++) {
1290: struct file *fp = *rp;
1291: if (discard)
1292: *rp = 0;
1293: (*op)(fp);
1294: rp++;
1295: }
1.1 cgd 1296: break; /* XXX, but saves time */
1297: }
1.48 thorpej 1298: }
1.52 thorpej 1299: m0 = m0->m_nextpkt;
1.1 cgd 1300: }
1301: }
1302:
1.5 andrew 1303: void
1.76 matt 1304: unp_mark(struct file *fp)
1.1 cgd 1305: {
1.39 sommerfe 1306: if (fp == NULL)
1307: return;
1.80 perry 1308:
1.39 sommerfe 1309: if (fp->f_flag & FMARK)
1310: return;
1.1 cgd 1311:
1.39 sommerfe 1312: /* If we're already deferred, don't screw up the defer count */
1313: if (fp->f_flag & FDEFER)
1.1 cgd 1314: return;
1.39 sommerfe 1315:
1316: /*
1317: * Minimize the number of deferrals... Sockets are the only
1318: * type of descriptor which can hold references to another
1319: * descriptor, so just mark other descriptors, and defer
1320: * unmarked sockets for the next pass.
1321: */
1322: if (fp->f_type == DTYPE_SOCKET) {
1323: unp_defer++;
1324: if (fp->f_count == 0)
1325: panic("unp_mark: queued unref");
1326: fp->f_flag |= FDEFER;
1327: } else {
1328: fp->f_flag |= FMARK;
1329: }
1330: return;
1.1 cgd 1331: }
1332:
1.5 andrew 1333: void
1.76 matt 1334: unp_discard(struct file *fp)
1.1 cgd 1335: {
1.39 sommerfe 1336: if (fp == NULL)
1337: return;
1.57 pk 1338: simple_lock(&fp->f_slock);
1339: fp->f_usecount++; /* i.e. FILE_USE(fp) sans locking */
1.1 cgd 1340: fp->f_msgcount--;
1.57 pk 1341: simple_unlock(&fp->f_slock);
1.1 cgd 1342: unp_rights--;
1.62 fvdl 1343: (void) closef(fp, (struct proc *)0);
1.1 cgd 1344: }
CVSweb <webmaster@jp.NetBSD.org>