[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.80.2.1

1.80.2.1! tron        1: /*     $NetBSD$        */
1.30      thorpej     2:
                      3: /*-
1.77      matt        4:  * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.10      cgd        39:
1.1       cgd        40: /*
1.8       mycroft    41:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        43:  *
                     44:  * Redistribution and use in source and binary forms, with or without
                     45:  * modification, are permitted provided that the following conditions
                     46:  * are met:
                     47:  * 1. Redistributions of source code must retain the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
1.67      agc        52:  * 3. Neither the name of the University nor the names of its contributors
                     53:  *    may be used to endorse or promote products derived from this software
                     54:  *    without specific prior written permission.
                     55:  *
                     56:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     57:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     58:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     59:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     60:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     61:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     62:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     63:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     64:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     65:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     66:  * SUCH DAMAGE.
                     67:  *
                     68:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     69:  */
                     70:
                     71: /*
                     72:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     73:  *
                     74:  * Redistribution and use in source and binary forms, with or without
                     75:  * modification, are permitted provided that the following conditions
                     76:  * are met:
                     77:  * 1. Redistributions of source code must retain the above copyright
                     78:  *    notice, this list of conditions and the following disclaimer.
                     79:  * 2. Redistributions in binary form must reproduce the above copyright
                     80:  *    notice, this list of conditions and the following disclaimer in the
                     81:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        82:  * 3. All advertising materials mentioning features or use of this software
                     83:  *    must display the following acknowledgement:
                     84:  *     This product includes software developed by the University of
                     85:  *     California, Berkeley and its contributors.
                     86:  * 4. Neither the name of the University nor the names of its contributors
                     87:  *    may be used to endorse or promote products derived from this software
                     88:  *    without specific prior written permission.
                     89:  *
                     90:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     91:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     92:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     93:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     94:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     95:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     96:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     97:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     98:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     99:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    100:  * SUCH DAMAGE.
                    101:  *
1.31      fvdl      102:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd       103:  */
1.53      lukem     104:
                    105: #include <sys/cdefs.h>
1.80.2.1! tron      106: __KERNEL_RCSID(0, "$NetBSD$");
1.1       cgd       107:
1.7       mycroft   108: #include <sys/param.h>
1.8       mycroft   109: #include <sys/systm.h>
1.7       mycroft   110: #include <sys/proc.h>
                    111: #include <sys/filedesc.h>
                    112: #include <sys/domain.h>
                    113: #include <sys/protosw.h>
                    114: #include <sys/socket.h>
                    115: #include <sys/socketvar.h>
                    116: #include <sys/unpcb.h>
                    117: #include <sys/un.h>
                    118: #include <sys/namei.h>
                    119: #include <sys/vnode.h>
                    120: #include <sys/file.h>
                    121: #include <sys/stat.h>
                    122: #include <sys/mbuf.h>
1.1       cgd       123:
                    124: /*
                    125:  * Unix communications domain.
                    126:  *
                    127:  * TODO:
                    128:  *     SEQPACKET, RDM
                    129:  *     rethink name space problems
                    130:  *     need a proper out-of-band
                    131:  */
1.77      matt      132: const struct   sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL };
1.1       cgd       133: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    134:
1.74      junyoung  135: struct mbuf *unp_addsockcred(struct proc *, struct mbuf *);
1.30      thorpej   136:
1.20      mycroft   137: int
1.76      matt      138: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
                    139:        struct proc *p)
1.20      mycroft   140: {
                    141:        struct socket *so2;
1.77      matt      142:        const struct sockaddr_un *sun;
1.20      mycroft   143:
                    144:        so2 = unp->unp_conn->unp_socket;
                    145:        if (unp->unp_addr)
                    146:                sun = unp->unp_addr;
                    147:        else
                    148:                sun = &sun_noname;
1.30      thorpej   149:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
                    150:                control = unp_addsockcred(p, control);
1.20      mycroft   151:        if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
                    152:            control) == 0) {
                    153:                m_freem(control);
                    154:                m_freem(m);
1.79      darrenr   155:                so2->so_rcv.sb_overflowed++;
1.60      christos  156:                return (ENOBUFS);
1.20      mycroft   157:        } else {
                    158:                sorwakeup(so2);
                    159:                return (0);
                    160:        }
                    161: }
                    162:
                    163: void
1.76      matt      164: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   165: {
1.77      matt      166:        const struct sockaddr_un *sun;
1.20      mycroft   167:
                    168:        if (unp->unp_addr)
                    169:                sun = unp->unp_addr;
                    170:        else
                    171:                sun = &sun_noname;
                    172:        nam->m_len = sun->sun_len;
1.56      itojun    173:        if (nam->m_len > MLEN)
1.27      thorpej   174:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36      perry     175:        memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20      mycroft   176: }
                    177:
                    178: void
1.76      matt      179: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   180: {
1.77      matt      181:        const struct sockaddr_un *sun;
1.20      mycroft   182:
                    183:        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    184:                sun = unp->unp_conn->unp_addr;
                    185:        else
                    186:                sun = &sun_noname;
                    187:        nam->m_len = sun->sun_len;
1.56      itojun    188:        if (nam->m_len > MLEN)
1.27      thorpej   189:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36      perry     190:        memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20      mycroft   191: }
                    192:
1.1       cgd       193: /*ARGSUSED*/
1.5       andrew    194: int
1.76      matt      195: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
                    196:        struct mbuf *control, struct proc *p)
1.1       cgd       197: {
                    198:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  199:        struct socket *so2;
1.75      christos  200:        u_int newhiwat;
1.46      augustss  201:        int error = 0;
1.1       cgd       202:
                    203:        if (req == PRU_CONTROL)
                    204:                return (EOPNOTSUPP);
1.20      mycroft   205:
1.22      mycroft   206: #ifdef DIAGNOSTIC
                    207:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    208:                panic("uipc_usrreq: unexpected control mbuf");
                    209: #endif
1.1       cgd       210:        if (unp == 0 && req != PRU_ATTACH) {
                    211:                error = EINVAL;
                    212:                goto release;
                    213:        }
1.20      mycroft   214:
1.1       cgd       215:        switch (req) {
                    216:
                    217:        case PRU_ATTACH:
1.20      mycroft   218:                if (unp != 0) {
1.1       cgd       219:                        error = EISCONN;
                    220:                        break;
                    221:                }
                    222:                error = unp_attach(so);
                    223:                break;
                    224:
                    225:        case PRU_DETACH:
                    226:                unp_detach(unp);
                    227:                break;
                    228:
                    229:        case PRU_BIND:
1.62      fvdl      230:                error = unp_bind(unp, nam, p);
1.1       cgd       231:                break;
                    232:
                    233:        case PRU_LISTEN:
                    234:                if (unp->unp_vnode == 0)
                    235:                        error = EINVAL;
                    236:                break;
                    237:
                    238:        case PRU_CONNECT:
1.62      fvdl      239:                error = unp_connect(so, nam, p);
1.1       cgd       240:                break;
                    241:
                    242:        case PRU_CONNECT2:
1.72      matt      243:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       244:                break;
                    245:
                    246:        case PRU_DISCONNECT:
                    247:                unp_disconnect(unp);
                    248:                break;
                    249:
                    250:        case PRU_ACCEPT:
1.20      mycroft   251:                unp_setpeeraddr(unp, nam);
1.72      matt      252:                /*
                    253:                 * Mark the initiating STREAM socket as connected *ONLY*
                    254:                 * after it's been accepted.  This prevents a client from
                    255:                 * overrunning a server and receiving ECONNREFUSED.
                    256:                 */
                    257:                if (unp->unp_conn != NULL &&
                    258:                    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
                    259:                        soisconnected(unp->unp_conn->unp_socket);
1.1       cgd       260:                break;
                    261:
                    262:        case PRU_SHUTDOWN:
                    263:                socantsendmore(so);
                    264:                unp_shutdown(unp);
                    265:                break;
                    266:
                    267:        case PRU_RCVD:
                    268:                switch (so->so_type) {
                    269:
                    270:                case SOCK_DGRAM:
                    271:                        panic("uipc 1");
                    272:                        /*NOTREACHED*/
                    273:
                    274:                case SOCK_STREAM:
                    275: #define        rcv (&so->so_rcv)
                    276: #define snd (&so2->so_snd)
                    277:                        if (unp->unp_conn == 0)
                    278:                                break;
                    279:                        so2 = unp->unp_conn->unp_socket;
                    280:                        /*
                    281:                         * Adjust backpressure on sender
                    282:                         * and wakeup any waiting to write.
                    283:                         */
                    284:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    285:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  286:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
                    287:                        (void)chgsbsize(so2->so_uid,
                    288:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       289:                        unp->unp_cc = rcv->sb_cc;
                    290:                        sowwakeup(so2);
                    291: #undef snd
                    292: #undef rcv
                    293:                        break;
                    294:
                    295:                default:
                    296:                        panic("uipc 2");
                    297:                }
                    298:                break;
                    299:
                    300:        case PRU_SEND:
1.30      thorpej   301:                /*
                    302:                 * Note: unp_internalize() rejects any control message
                    303:                 * other than SCM_RIGHTS, and only allows one.  This
                    304:                 * has the side-effect of preventing a caller from
                    305:                 * forging SCM_CREDS.
                    306:                 */
1.80.2.1! tron      307:                if (control && (error = unp_internalize(control, p))) {
        !           308:                        goto die;
        !           309:                }
1.1       cgd       310:                switch (so->so_type) {
                    311:
                    312:                case SOCK_DGRAM: {
                    313:                        if (nam) {
1.20      mycroft   314:                                if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1       cgd       315:                                        error = EISCONN;
1.21      mycroft   316:                                        goto die;
1.1       cgd       317:                                }
1.62      fvdl      318:                                error = unp_connect(so, nam, p);
1.20      mycroft   319:                                if (error) {
1.23      mycroft   320:                                die:
1.21      mycroft   321:                                        m_freem(control);
1.20      mycroft   322:                                        m_freem(m);
1.1       cgd       323:                                        break;
1.20      mycroft   324:                                }
1.1       cgd       325:                        } else {
1.20      mycroft   326:                                if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1       cgd       327:                                        error = ENOTCONN;
1.21      mycroft   328:                                        goto die;
1.1       cgd       329:                                }
                    330:                        }
1.30      thorpej   331:                        error = unp_output(m, control, unp, p);
1.1       cgd       332:                        if (nam)
                    333:                                unp_disconnect(unp);
                    334:                        break;
                    335:                }
                    336:
                    337:                case SOCK_STREAM:
                    338: #define        rcv (&so2->so_rcv)
                    339: #define        snd (&so->so_snd)
                    340:                        if (unp->unp_conn == 0)
                    341:                                panic("uipc 3");
                    342:                        so2 = unp->unp_conn->unp_socket;
1.30      thorpej   343:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    344:                                /*
                    345:                                 * Credentials are passed only once on
                    346:                                 * SOCK_STREAM.
                    347:                                 */
                    348:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
                    349:                                control = unp_addsockcred(p, control);
                    350:                        }
1.1       cgd       351:                        /*
                    352:                         * Send to paired receive port, and then reduce
                    353:                         * send buffer hiwater marks to maintain backpressure.
                    354:                         * Wake up readers.
                    355:                         */
                    356:                        if (control) {
1.21      mycroft   357:                                if (sbappendcontrol(rcv, m, control) == 0)
                    358:                                        m_freem(control);
1.1       cgd       359:                        } else
                    360:                                sbappend(rcv, m);
                    361:                        snd->sb_mbmax -=
                    362:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    363:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  364:                        newhiwat = snd->sb_hiwat -
                    365:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
                    366:                        (void)chgsbsize(so->so_uid,
                    367:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       368:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    369:                        sorwakeup(so2);
                    370: #undef snd
                    371: #undef rcv
                    372:                        break;
                    373:
                    374:                default:
                    375:                        panic("uipc 4");
                    376:                }
                    377:                break;
                    378:
                    379:        case PRU_ABORT:
                    380:                unp_drop(unp, ECONNABORTED);
1.39      sommerfe  381:
                    382: #ifdef DIAGNOSTIC
                    383:                if (so->so_pcb == 0)
                    384:                        panic("uipc 5: drop killed pcb");
                    385: #endif
                    386:                unp_detach(unp);
1.1       cgd       387:                break;
                    388:
                    389:        case PRU_SENSE:
                    390:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
                    391:                if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
                    392:                        so2 = unp->unp_conn->unp_socket;
                    393:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
                    394:                }
                    395:                ((struct stat *) m)->st_dev = NODEV;
                    396:                if (unp->unp_ino == 0)
                    397:                        unp->unp_ino = unp_ino++;
1.25      kleink    398:                ((struct stat *) m)->st_atimespec =
                    399:                    ((struct stat *) m)->st_mtimespec =
                    400:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       401:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    402:                return (0);
                    403:
                    404:        case PRU_RCVOOB:
1.20      mycroft   405:                error = EOPNOTSUPP;
                    406:                break;
1.1       cgd       407:
                    408:        case PRU_SENDOOB:
1.22      mycroft   409:                m_freem(control);
1.20      mycroft   410:                m_freem(m);
1.1       cgd       411:                error = EOPNOTSUPP;
                    412:                break;
                    413:
                    414:        case PRU_SOCKADDR:
1.20      mycroft   415:                unp_setsockaddr(unp, nam);
1.1       cgd       416:                break;
                    417:
                    418:        case PRU_PEERADDR:
1.20      mycroft   419:                unp_setpeeraddr(unp, nam);
1.1       cgd       420:                break;
                    421:
                    422:        default:
                    423:                panic("piusrreq");
                    424:        }
1.20      mycroft   425:
1.1       cgd       426: release:
                    427:        return (error);
                    428: }
                    429:
                    430: /*
1.30      thorpej   431:  * Unix domain socket option processing.
                    432:  */
                    433: int
1.76      matt      434: uipc_ctloutput(int op, struct socket *so, int level, int optname,
                    435:        struct mbuf **mp)
1.30      thorpej   436: {
                    437:        struct unpcb *unp = sotounpcb(so);
                    438:        struct mbuf *m = *mp;
                    439:        int optval = 0, error = 0;
                    440:
                    441:        if (level != 0) {
                    442:                error = EINVAL;
                    443:                if (op == PRCO_SETOPT && m)
                    444:                        (void) m_free(m);
                    445:        } else switch (op) {
                    446:
                    447:        case PRCO_SETOPT:
                    448:                switch (optname) {
                    449:                case LOCAL_CREDS:
1.72      matt      450:                case LOCAL_CONNWAIT:
1.30      thorpej   451:                        if (m == NULL || m->m_len != sizeof(int))
                    452:                                error = EINVAL;
                    453:                        else {
                    454:                                optval = *mtod(m, int *);
                    455:                                switch (optname) {
                    456: #define        OPTSET(bit) \
                    457:        if (optval) \
                    458:                unp->unp_flags |= (bit); \
                    459:        else \
                    460:                unp->unp_flags &= ~(bit);
                    461:
                    462:                                case LOCAL_CREDS:
                    463:                                        OPTSET(UNP_WANTCRED);
                    464:                                        break;
1.72      matt      465:                                case LOCAL_CONNWAIT:
                    466:                                        OPTSET(UNP_CONNWAIT);
                    467:                                        break;
1.30      thorpej   468:                                }
                    469:                        }
                    470:                        break;
                    471: #undef OPTSET
                    472:
                    473:                default:
                    474:                        error = ENOPROTOOPT;
                    475:                        break;
                    476:                }
                    477:                if (m)
                    478:                        (void) m_free(m);
                    479:                break;
                    480:
                    481:        case PRCO_GETOPT:
                    482:                switch (optname) {
                    483:                case LOCAL_CREDS:
                    484:                        *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    485:                        m->m_len = sizeof(int);
                    486:                        switch (optname) {
                    487:
                    488: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    489:
                    490:                        case LOCAL_CREDS:
                    491:                                optval = OPTBIT(UNP_WANTCRED);
                    492:                                break;
                    493:                        }
                    494:                        *mtod(m, int *) = optval;
                    495:                        break;
                    496: #undef OPTBIT
                    497:
                    498:                default:
                    499:                        error = ENOPROTOOPT;
                    500:                        break;
                    501:                }
                    502:                break;
                    503:        }
                    504:        return (error);
                    505: }
                    506:
                    507: /*
1.1       cgd       508:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    509:  * for stream sockets, although the total for sender and receiver is
                    510:  * actually only PIPSIZ.
                    511:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    512:  * and don't really want to reserve the sendspace.  Their recvspace should
                    513:  * be large enough for at least one max-size datagram plus address.
                    514:  */
                    515: #define        PIPSIZ  4096
                    516: u_long unpst_sendspace = PIPSIZ;
                    517: u_long unpst_recvspace = PIPSIZ;
                    518: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    519: u_long unpdg_recvspace = 4*1024;
                    520:
                    521: int    unp_rights;                     /* file descriptors in flight */
                    522:
1.5       andrew    523: int
1.76      matt      524: unp_attach(struct socket *so)
1.1       cgd       525: {
1.46      augustss  526:        struct unpcb *unp;
1.25      kleink    527:        struct timeval tv;
1.1       cgd       528:        int error;
1.80      perry     529:
1.1       cgd       530:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    531:                switch (so->so_type) {
                    532:
                    533:                case SOCK_STREAM:
                    534:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
                    535:                        break;
                    536:
                    537:                case SOCK_DGRAM:
                    538:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
                    539:                        break;
1.8       mycroft   540:
                    541:                default:
                    542:                        panic("unp_attach");
1.1       cgd       543:                }
                    544:                if (error)
                    545:                        return (error);
                    546:        }
1.14      mycroft   547:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    548:        if (unp == NULL)
1.1       cgd       549:                return (ENOBUFS);
1.36      perry     550:        memset((caddr_t)unp, 0, sizeof(*unp));
1.14      mycroft   551:        unp->unp_socket = so;
1.15      mycroft   552:        so->so_pcb = unp;
1.25      kleink    553:        microtime(&tv);
                    554:        TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
1.1       cgd       555:        return (0);
                    556: }
                    557:
1.17      pk        558: void
1.76      matt      559: unp_detach(struct unpcb *unp)
1.1       cgd       560: {
1.80      perry     561:
1.1       cgd       562:        if (unp->unp_vnode) {
                    563:                unp->unp_vnode->v_socket = 0;
                    564:                vrele(unp->unp_vnode);
                    565:                unp->unp_vnode = 0;
                    566:        }
                    567:        if (unp->unp_conn)
                    568:                unp_disconnect(unp);
                    569:        while (unp->unp_refs)
                    570:                unp_drop(unp->unp_refs, ECONNRESET);
                    571:        soisdisconnected(unp->unp_socket);
                    572:        unp->unp_socket->so_pcb = 0;
1.20      mycroft   573:        if (unp->unp_addr)
1.26      thorpej   574:                free(unp->unp_addr, M_SONAME);
1.8       mycroft   575:        if (unp_rights) {
                    576:                /*
                    577:                 * Normally the receive buffer is flushed later,
                    578:                 * in sofree, but if our receive buffer holds references
                    579:                 * to descriptors that are now garbage, we will dispose
                    580:                 * of those descriptor references after the garbage collector
                    581:                 * gets them (resulting in a "panic: closef: count < 0").
                    582:                 */
                    583:                sorflush(unp->unp_socket);
1.14      mycroft   584:                free(unp, M_PCB);
1.1       cgd       585:                unp_gc();
1.14      mycroft   586:        } else
                    587:                free(unp, M_PCB);
1.1       cgd       588: }
                    589:
1.5       andrew    590: int
1.76      matt      591: unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p)
1.1       cgd       592: {
1.27      thorpej   593:        struct sockaddr_un *sun;
1.46      augustss  594:        struct vnode *vp;
1.70      hannken   595:        struct mount *mp;
1.1       cgd       596:        struct vattr vattr;
1.27      thorpej   597:        size_t addrlen;
1.1       cgd       598:        int error;
                    599:        struct nameidata nd;
                    600:
1.20      mycroft   601:        if (unp->unp_vnode != 0)
                    602:                return (EINVAL);
1.27      thorpej   603:
                    604:        /*
                    605:         * Allocate the new sockaddr.  We have to allocate one
                    606:         * extra byte so that we can ensure that the pathname
                    607:         * is nul-terminated.
                    608:         */
                    609:        addrlen = nam->m_len + 1;
                    610:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
                    611:        m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
                    612:        *(((char *)sun) + nam->m_len) = '\0';
                    613:
1.70      hannken   614: restart:
1.9       mycroft   615:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1.62      fvdl      616:            sun->sun_path, p);
1.27      thorpej   617:
1.1       cgd       618: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16      christos  619:        if ((error = namei(&nd)) != 0)
1.27      thorpej   620:                goto bad;
1.9       mycroft   621:        vp = nd.ni_vp;
1.70      hannken   622:        if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
1.9       mycroft   623:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    624:                if (nd.ni_dvp == vp)
                    625:                        vrele(nd.ni_dvp);
1.1       cgd       626:                else
1.9       mycroft   627:                        vput(nd.ni_dvp);
1.1       cgd       628:                vrele(vp);
1.70      hannken   629:                if (vp != NULL) {
                    630:                        error = EADDRINUSE;
                    631:                        goto bad;
                    632:                }
                    633:                error = vn_start_write(NULL, &mp,
                    634:                    V_WAIT | V_SLEEPONLY | V_PCATCH);
                    635:                if (error)
                    636:                        goto bad;
                    637:                goto restart;
1.1       cgd       638:        }
                    639:        VATTR_NULL(&vattr);
                    640:        vattr.va_type = VSOCK;
1.9       mycroft   641:        vattr.va_mode = ACCESSPERMS;
1.62      fvdl      642:        VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1.16      christos  643:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1.70      hannken   644:        vn_finished_write(mp, 0);
1.16      christos  645:        if (error)
1.27      thorpej   646:                goto bad;
1.9       mycroft   647:        vp = nd.ni_vp;
1.1       cgd       648:        vp->v_socket = unp->unp_socket;
                    649:        unp->unp_vnode = vp;
1.27      thorpej   650:        unp->unp_addrlen = addrlen;
                    651:        unp->unp_addr = sun;
1.31      fvdl      652:        VOP_UNLOCK(vp, 0);
1.1       cgd       653:        return (0);
1.27      thorpej   654:
                    655:  bad:
                    656:        free(sun, M_SONAME);
                    657:        return (error);
1.1       cgd       658: }
                    659:
1.5       andrew    660: int
1.76      matt      661: unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
1.1       cgd       662: {
1.46      augustss  663:        struct sockaddr_un *sun;
                    664:        struct vnode *vp;
                    665:        struct socket *so2, *so3;
1.1       cgd       666:        struct unpcb *unp2, *unp3;
1.27      thorpej   667:        size_t addrlen;
1.1       cgd       668:        int error;
                    669:        struct nameidata nd;
                    670:
1.27      thorpej   671:        /*
                    672:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    673:         * byte so that we can ensure that the pathname is nul-terminated.
                    674:         * When we establish the connection, we copy the other PCB's
                    675:         * sockaddr to our own.
                    676:         */
                    677:        addrlen = nam->m_len + 1;
                    678:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
                    679:        m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
                    680:        *(((char *)sun) + nam->m_len) = '\0';
                    681:
1.62      fvdl      682:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
1.27      thorpej   683:
1.16      christos  684:        if ((error = namei(&nd)) != 0)
1.27      thorpej   685:                goto bad2;
1.9       mycroft   686:        vp = nd.ni_vp;
1.1       cgd       687:        if (vp->v_type != VSOCK) {
                    688:                error = ENOTSOCK;
                    689:                goto bad;
                    690:        }
1.62      fvdl      691:        if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
1.1       cgd       692:                goto bad;
                    693:        so2 = vp->v_socket;
                    694:        if (so2 == 0) {
                    695:                error = ECONNREFUSED;
                    696:                goto bad;
                    697:        }
                    698:        if (so->so_type != so2->so_type) {
                    699:                error = EPROTOTYPE;
                    700:                goto bad;
                    701:        }
                    702:        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                    703:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    704:                    (so3 = sonewconn(so2, 0)) == 0) {
                    705:                        error = ECONNREFUSED;
                    706:                        goto bad;
                    707:                }
                    708:                unp2 = sotounpcb(so2);
                    709:                unp3 = sotounpcb(so3);
1.26      thorpej   710:                if (unp2->unp_addr) {
                    711:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                    712:                            M_SONAME, M_WAITOK);
1.36      perry     713:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej   714:                            unp2->unp_addrlen);
                    715:                        unp3->unp_addrlen = unp2->unp_addrlen;
                    716:                }
1.30      thorpej   717:                unp3->unp_flags = unp2->unp_flags;
1.33      thorpej   718:                so2 = so3;
                    719:        }
1.72      matt      720:        error = unp_connect2(so, so2, PRU_CONNECT);
1.27      thorpej   721:  bad:
1.1       cgd       722:        vput(vp);
1.27      thorpej   723:  bad2:
                    724:        free(sun, M_SONAME);
1.1       cgd       725:        return (error);
                    726: }
                    727:
1.5       andrew    728: int
1.76      matt      729: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd       730: {
1.46      augustss  731:        struct unpcb *unp = sotounpcb(so);
                    732:        struct unpcb *unp2;
1.1       cgd       733:
                    734:        if (so2->so_type != so->so_type)
                    735:                return (EPROTOTYPE);
                    736:        unp2 = sotounpcb(so2);
                    737:        unp->unp_conn = unp2;
                    738:        switch (so->so_type) {
                    739:
                    740:        case SOCK_DGRAM:
                    741:                unp->unp_nextref = unp2->unp_refs;
                    742:                unp2->unp_refs = unp;
                    743:                soisconnected(so);
                    744:                break;
                    745:
                    746:        case SOCK_STREAM:
                    747:                unp2->unp_conn = unp;
1.72      matt      748:                if (req == PRU_CONNECT &&
                    749:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                    750:                        soisconnecting(so);
                    751:                else
                    752:                        soisconnected(so);
1.1       cgd       753:                soisconnected(so2);
                    754:                break;
                    755:
                    756:        default:
                    757:                panic("unp_connect2");
                    758:        }
                    759:        return (0);
                    760: }
                    761:
1.5       andrew    762: void
1.76      matt      763: unp_disconnect(struct unpcb *unp)
1.1       cgd       764: {
1.46      augustss  765:        struct unpcb *unp2 = unp->unp_conn;
1.1       cgd       766:
                    767:        if (unp2 == 0)
                    768:                return;
                    769:        unp->unp_conn = 0;
                    770:        switch (unp->unp_socket->so_type) {
                    771:
                    772:        case SOCK_DGRAM:
                    773:                if (unp2->unp_refs == unp)
                    774:                        unp2->unp_refs = unp->unp_nextref;
                    775:                else {
                    776:                        unp2 = unp2->unp_refs;
                    777:                        for (;;) {
                    778:                                if (unp2 == 0)
                    779:                                        panic("unp_disconnect");
                    780:                                if (unp2->unp_nextref == unp)
                    781:                                        break;
                    782:                                unp2 = unp2->unp_nextref;
                    783:                        }
                    784:                        unp2->unp_nextref = unp->unp_nextref;
                    785:                }
                    786:                unp->unp_nextref = 0;
                    787:                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                    788:                break;
                    789:
                    790:        case SOCK_STREAM:
                    791:                soisdisconnected(unp->unp_socket);
                    792:                unp2->unp_conn = 0;
                    793:                soisdisconnected(unp2->unp_socket);
                    794:                break;
                    795:        }
                    796: }
                    797:
                    798: #ifdef notdef
1.76      matt      799: unp_abort(struct unpcb *unp)
1.1       cgd       800: {
                    801:        unp_detach(unp);
                    802: }
                    803: #endif
                    804:
1.5       andrew    805: void
1.76      matt      806: unp_shutdown(struct unpcb *unp)
1.1       cgd       807: {
                    808:        struct socket *so;
                    809:
                    810:        if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
                    811:            (so = unp->unp_conn->unp_socket))
                    812:                socantrcvmore(so);
                    813: }
                    814:
1.5       andrew    815: void
1.76      matt      816: unp_drop(struct unpcb *unp, int errno)
1.1       cgd       817: {
                    818:        struct socket *so = unp->unp_socket;
                    819:
                    820:        so->so_error = errno;
                    821:        unp_disconnect(unp);
                    822:        if (so->so_head) {
1.15      mycroft   823:                so->so_pcb = 0;
1.14      mycroft   824:                sofree(so);
1.20      mycroft   825:                if (unp->unp_addr)
1.26      thorpej   826:                        free(unp->unp_addr, M_SONAME);
1.14      mycroft   827:                free(unp, M_PCB);
1.1       cgd       828:        }
                    829: }
                    830:
                    831: #ifdef notdef
1.76      matt      832: unp_drain(void)
1.1       cgd       833: {
                    834:
                    835: }
                    836: #endif
                    837:
1.5       andrew    838: int
1.78      jonathan  839: unp_externalize(struct mbuf *rights, struct proc *p)
1.1       cgd       840: {
1.46      augustss  841:        struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.47      thorpej   842:        int i, *fdp;
1.46      augustss  843:        struct file **rp;
                    844:        struct file *fp;
1.50      thorpej   845:        int nfds, error = 0;
1.47      thorpej   846:
                    847:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
                    848:            sizeof(struct file *);
                    849:        rp = (struct file **)CMSG_DATA(cm);
1.1       cgd       850:
1.50      thorpej   851:        fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
                    852:
1.39      sommerfe  853:        /* Make sure the recipient should be able to see the descriptors.. */
1.42      thorpej   854:        if (p->p_cwdi->cwdi_rdir != NULL) {
1.48      thorpej   855:                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  856:                for (i = 0; i < nfds; i++) {
                    857:                        fp = *rp++;
                    858:                        /*
                    859:                         * If we are in a chroot'ed directory, and
                    860:                         * someone wants to pass us a directory, make
                    861:                         * sure it's inside the subtree we're allowed
                    862:                         * to access.
                    863:                         */
                    864:                        if (fp->f_type == DTYPE_VNODE) {
                    865:                                struct vnode *vp = (struct vnode *)fp->f_data;
                    866:                                if ((vp->v_type == VDIR) &&
1.62      fvdl      867:                                    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
1.39      sommerfe  868:                                        error = EPERM;
                    869:                                        break;
                    870:                                }
                    871:                        }
                    872:                }
                    873:        }
1.50      thorpej   874:
                    875:  restart:
1.47      thorpej   876:        rp = (struct file **)CMSG_DATA(cm);
1.50      thorpej   877:        if (error != 0) {
1.24      cgd       878:                for (i = 0; i < nfds; i++) {
1.1       cgd       879:                        fp = *rp;
1.39      sommerfe  880:                        /*
                    881:                         * zero the pointer before calling unp_discard,
                    882:                         * since it may end up in unp_gc()..
                    883:                         */
                    884:                        *rp++ = 0;
1.1       cgd       885:                        unp_discard(fp);
                    886:                }
1.50      thorpej   887:                goto out;
1.1       cgd       888:        }
1.50      thorpej   889:
1.24      cgd       890:        /*
1.50      thorpej   891:         * First loop -- allocate file descriptor table slots for the
                    892:         * new descriptors.
1.24      cgd       893:         */
                    894:        for (i = 0; i < nfds; i++) {
1.39      sommerfe  895:                fp = *rp++;
1.50      thorpej   896:                if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej   897:                        /*
1.50      thorpej   898:                         * Back out what we've done so far.
1.49      thorpej   899:                         */
1.50      thorpej   900:                        for (--i; i >= 0; i--)
                    901:                                fdremove(p->p_fd, fdp[i]);
                    902:
                    903:                        if (error == ENOSPC) {
                    904:                                fdexpand(p);
                    905:                                error = 0;
                    906:                        } else {
                    907:                                /*
                    908:                                 * This is the error that has historically
                    909:                                 * been returned, and some callers may
                    910:                                 * expect it.
                    911:                                 */
                    912:                                error = EMSGSIZE;
                    913:                        }
                    914:                        goto restart;
1.49      thorpej   915:                }
1.50      thorpej   916:
                    917:                /*
                    918:                 * Make the slot reference the descriptor so that
                    919:                 * fdalloc() works properly.. We finalize it all
                    920:                 * in the loop below.
                    921:                 */
                    922:                p->p_fd->fd_ofiles[fdp[i]] = fp;
1.1       cgd       923:        }
1.24      cgd       924:
                    925:        /*
1.50      thorpej   926:         * Now that adding them has succeeded, update all of the
                    927:         * descriptor passing state.
1.24      cgd       928:         */
1.50      thorpej   929:        rp = (struct file **)CMSG_DATA(cm);
                    930:        for (i = 0; i < nfds; i++) {
                    931:                fp = *rp++;
                    932:                fp->f_msgcount--;
                    933:                unp_rights--;
                    934:        }
                    935:
                    936:        /*
                    937:         * Copy temporary array to message and adjust length, in case of
                    938:         * transition from large struct file pointers to ints.
                    939:         */
                    940:        memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47      thorpej   941:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                    942:        rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50      thorpej   943:  out:
                    944:        free(fdp, M_TEMP);
                    945:        return (error);
1.1       cgd       946: }
                    947:
1.5       andrew    948: int
1.76      matt      949: unp_internalize(struct mbuf *control, struct proc *p)
1.1       cgd       950: {
1.24      cgd       951:        struct filedesc *fdescp = p->p_fd;
1.73      martin    952:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
                    953:        struct file **rp, **files;
1.46      augustss  954:        struct file *fp;
                    955:        int i, fd, *fdp;
1.24      cgd       956:        int nfds;
                    957:        u_int neededspace;
1.38      thorpej   958:
1.24      cgd       959:        /* Sanity check the control message header */
1.66      jdolecek  960:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1       cgd       961:            cm->cmsg_len != control->m_len)
                    962:                return (EINVAL);
1.24      cgd       963:
                    964:        /* Verify that the file descriptors are valid */
1.47      thorpej   965:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                    966:        fdp = (int *)CMSG_DATA(cm);
1.24      cgd       967:        for (i = 0; i < nfds; i++) {
                    968:                fd = *fdp++;
1.58      pk        969:                if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1       cgd       970:                        return (EBADF);
1.58      pk        971:                simple_unlock(&fp->f_slock);
1.1       cgd       972:        }
1.24      cgd       973:
                    974:        /* Make sure we have room for the struct file pointers */
1.47      thorpej   975:        neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
                    976:            control->m_len;
1.24      cgd       977:        if (neededspace > M_TRAILINGSPACE(control)) {
                    978:
1.73      martin    979:                /* allocate new space and copy header into it */
                    980:                newcm = malloc(
                    981:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                    982:                    M_MBUF, M_WAITOK);
                    983:                if (newcm == NULL)
1.24      cgd       984:                        return (E2BIG);
1.73      martin    985:                memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80      perry     986:                files = (struct file **)CMSG_DATA(newcm);
1.73      martin    987:        } else {
                    988:                /* we can convert in-place */
                    989:                newcm = NULL;
                    990:                files = (struct file **)CMSG_DATA(cm);
1.24      cgd       991:        }
                    992:
                    993:        /*
                    994:         * Transform the file descriptors into struct file pointers, in
                    995:         * reverse order so that if pointers are bigger than ints, the
                    996:         * int won't get until we're done.
                    997:         */
1.73      martin    998:        fdp = (int *)CMSG_DATA(cm) + nfds - 1;
                    999:        rp = files + nfds - 1;
1.24      cgd      1000:        for (i = 0; i < nfds; i++) {
1.28      christos 1001:                fp = fdescp->fd_ofiles[*fdp--];
1.57      pk       1002:                simple_lock(&fp->f_slock);
                   1003: #ifdef DIAGNOSTIC
                   1004:                if (fp->f_iflags & FIF_WANTCLOSE)
                   1005:                        panic("unp_internalize: file already closed");
                   1006: #endif
1.24      cgd      1007:                *rp-- = fp;
1.1       cgd      1008:                fp->f_count++;
                   1009:                fp->f_msgcount++;
1.57      pk       1010:                simple_unlock(&fp->f_slock);
1.1       cgd      1011:                unp_rights++;
                   1012:        }
1.73      martin   1013:
                   1014:        if (newcm) {
                   1015:                if (control->m_flags & M_EXT)
                   1016:                        MEXTREMOVE(control);
                   1017:                MEXTADD(control, newcm,
                   1018:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                   1019:                    M_MBUF, NULL, NULL);
                   1020:                cm = newcm;
                   1021:        }
                   1022:
                   1023:        /* adjust message & mbuf to note amount of space actually used. */
                   1024:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
                   1025:        control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
                   1026:
1.1       cgd      1027:        return (0);
1.30      thorpej  1028: }
                   1029:
                   1030: struct mbuf *
1.76      matt     1031: unp_addsockcred(struct proc *p, struct mbuf *control)
1.30      thorpej  1032: {
                   1033:        struct cmsghdr *cmp;
                   1034:        struct sockcred *sc;
                   1035:        struct mbuf *m, *n;
1.47      thorpej  1036:        int len, space, i;
1.30      thorpej  1037:
1.47      thorpej  1038:        len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
                   1039:        space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1.30      thorpej  1040:
                   1041:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1042:        if (space > MLEN) {
                   1043:                if (space > MCLBYTES)
                   1044:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1045:                else
1.59      matt     1046:                        m_clget(m, M_WAIT);
1.30      thorpej  1047:                if ((m->m_flags & M_EXT) == 0) {
                   1048:                        m_free(m);
                   1049:                        return (control);
                   1050:                }
                   1051:        }
                   1052:
1.47      thorpej  1053:        m->m_len = space;
1.30      thorpej  1054:        m->m_next = NULL;
                   1055:        cmp = mtod(m, struct cmsghdr *);
                   1056:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1057:        cmp->cmsg_len = len;
                   1058:        cmp->cmsg_level = SOL_SOCKET;
                   1059:        cmp->cmsg_type = SCM_CREDS;
                   1060:        sc->sc_uid = p->p_cred->p_ruid;
                   1061:        sc->sc_euid = p->p_ucred->cr_uid;
                   1062:        sc->sc_gid = p->p_cred->p_rgid;
                   1063:        sc->sc_egid = p->p_ucred->cr_gid;
                   1064:        sc->sc_ngroups = p->p_ucred->cr_ngroups;
                   1065:        for (i = 0; i < sc->sc_ngroups; i++)
                   1066:                sc->sc_groups[i] = p->p_ucred->cr_groups[i];
                   1067:
                   1068:        /*
                   1069:         * If a control message already exists, append us to the end.
                   1070:         */
                   1071:        if (control != NULL) {
                   1072:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1073:                        ;
                   1074:                n->m_next = m;
                   1075:        } else
                   1076:                control = m;
                   1077:
                   1078:        return (control);
1.1       cgd      1079: }
                   1080:
                   1081: int    unp_defer, unp_gcing;
                   1082: extern struct domain unixdomain;
                   1083:
1.39      sommerfe 1084: /*
                   1085:  * Comment added long after the fact explaining what's going on here.
                   1086:  * Do a mark-sweep GC of file descriptors on the system, to free up
                   1087:  * any which are caught in flight to an about-to-be-closed socket.
                   1088:  *
                   1089:  * Traditional mark-sweep gc's start at the "root", and mark
                   1090:  * everything reachable from the root (which, in our case would be the
                   1091:  * process table).  The mark bits are cleared during the sweep.
                   1092:  *
                   1093:  * XXX For some inexplicable reason (perhaps because the file
                   1094:  * descriptor tables used to live in the u area which could be swapped
                   1095:  * out and thus hard to reach), we do multiple scans over the set of
                   1096:  * descriptors, using use *two* mark bits per object (DEFER and MARK).
                   1097:  * Whenever we find a descriptor which references other descriptors,
                   1098:  * the ones it references are marked with both bits, and we iterate
                   1099:  * over the whole file table until there are no more DEFER bits set.
                   1100:  * We also make an extra pass *before* the GC to clear the mark bits,
                   1101:  * which could have been cleared at almost no cost during the previous
                   1102:  * sweep.
                   1103:  *
                   1104:  * XXX MP: this needs to run with locks such that no other thread of
                   1105:  * control can create or destroy references to file descriptors. it
                   1106:  * may be necessary to defer the GC until later (when the locking
                   1107:  * situation is more hospitable); it may be necessary to push this
                   1108:  * into a separate thread.
                   1109:  */
1.5       andrew   1110: void
1.76      matt     1111: unp_gc(void)
1.1       cgd      1112: {
1.46      augustss 1113:        struct file *fp, *nextfp;
                   1114:        struct socket *so, *so1;
1.8       mycroft  1115:        struct file **extra_ref, **fpp;
                   1116:        int nunref, i;
1.1       cgd      1117:
                   1118:        if (unp_gcing)
                   1119:                return;
                   1120:        unp_gcing = 1;
                   1121:        unp_defer = 0;
1.39      sommerfe 1122:
                   1123:        /* Clear mark bits */
1.54      matt     1124:        LIST_FOREACH(fp, &filehead, f_list)
1.1       cgd      1125:                fp->f_flag &= ~(FMARK|FDEFER);
1.39      sommerfe 1126:
                   1127:        /*
                   1128:         * Iterate over the set of descriptors, marking ones believed
                   1129:         * (based on refcount) to be referenced from a process, and
                   1130:         * marking for rescan descriptors which are queued on a socket.
                   1131:         */
1.1       cgd      1132:        do {
1.54      matt     1133:                LIST_FOREACH(fp, &filehead, f_list) {
1.1       cgd      1134:                        if (fp->f_flag & FDEFER) {
                   1135:                                fp->f_flag &= ~FDEFER;
                   1136:                                unp_defer--;
1.39      sommerfe 1137: #ifdef DIAGNOSTIC
                   1138:                                if (fp->f_count == 0)
                   1139:                                        panic("unp_gc: deferred unreferenced socket");
                   1140: #endif
1.1       cgd      1141:                        } else {
1.39      sommerfe 1142:                                if (fp->f_count == 0)
                   1143:                                        continue;
1.1       cgd      1144:                                if (fp->f_flag & FMARK)
                   1145:                                        continue;
                   1146:                                if (fp->f_count == fp->f_msgcount)
                   1147:                                        continue;
                   1148:                        }
1.39      sommerfe 1149:                        fp->f_flag |= FMARK;
                   1150:
1.1       cgd      1151:                        if (fp->f_type != DTYPE_SOCKET ||
                   1152:                            (so = (struct socket *)fp->f_data) == 0)
                   1153:                                continue;
                   1154:                        if (so->so_proto->pr_domain != &unixdomain ||
                   1155:                            (so->so_proto->pr_flags&PR_RIGHTS) == 0)
                   1156:                                continue;
                   1157: #ifdef notdef
                   1158:                        if (so->so_rcv.sb_flags & SB_LOCK) {
                   1159:                                /*
                   1160:                                 * This is problematical; it's not clear
                   1161:                                 * we need to wait for the sockbuf to be
                   1162:                                 * unlocked (on a uniprocessor, at least),
                   1163:                                 * and it's also not clear what to do
                   1164:                                 * if sbwait returns an error due to receipt
                   1165:                                 * of a signal.  If sbwait does return
                   1166:                                 * an error, we'll go into an infinite
                   1167:                                 * loop.  Delete all of this for now.
                   1168:                                 */
                   1169:                                (void) sbwait(&so->so_rcv);
                   1170:                                goto restart;
                   1171:                        }
                   1172: #endif
1.39      sommerfe 1173:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                   1174:                        /*
                   1175:                         * mark descriptors referenced from sockets queued on the accept queue as well.
                   1176:                         */
                   1177:                        if (so->so_options & SO_ACCEPTCONN) {
1.54      matt     1178:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1179:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1180:                                }
1.54      matt     1181:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1182:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1183:                                }
                   1184:                        }
1.80      perry    1185:
1.1       cgd      1186:                }
                   1187:        } while (unp_defer);
1.8       mycroft  1188:        /*
1.39      sommerfe 1189:         * Sweep pass.  Find unmarked descriptors, and free them.
                   1190:         *
1.8       mycroft  1191:         * We grab an extra reference to each of the file table entries
                   1192:         * that are not otherwise accessible and then free the rights
                   1193:         * that are stored in messages on them.
                   1194:         *
1.57      pk       1195:         * The bug in the original code is a little tricky, so I'll describe
1.8       mycroft  1196:         * what's wrong with it here.
                   1197:         *
                   1198:         * It is incorrect to simply unp_discard each entry for f_msgcount
                   1199:         * times -- consider the case of sockets A and B that contain
                   1200:         * references to each other.  On a last close of some other socket,
                   1201:         * we trigger a gc since the number of outstanding rights (unp_rights)
                   1202:         * is non-zero.  If during the sweep phase the gc code un_discards,
                   1203:         * we end up doing a (full) closef on the descriptor.  A closef on A
                   1204:         * results in the following chain.  Closef calls soo_close, which
                   1205:         * calls soclose.   Soclose calls first (through the switch
                   1206:         * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
                   1207:         * returns because the previous instance had set unp_gcing, and
                   1208:         * we return all the way back to soclose, which marks the socket
                   1209:         * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
                   1210:         * to free up the rights that are queued in messages on the socket A,
                   1211:         * i.e., the reference on B.  The sorflush calls via the dom_dispose
                   1212:         * switch unp_dispose, which unp_scans with unp_discard.  This second
                   1213:         * instance of unp_discard just calls closef on B.
                   1214:         *
                   1215:         * Well, a similar chain occurs on B, resulting in a sorflush on B,
                   1216:         * which results in another closef on A.  Unfortunately, A is already
                   1217:         * being closed, and the descriptor has already been marked with
                   1218:         * SS_NOFDREF, and soclose panics at this point.
                   1219:         *
                   1220:         * Here, we first take an extra reference to each inaccessible
1.39      sommerfe 1221:         * descriptor.  Then, if the inaccessible descriptor is a
                   1222:         * socket, we call sorflush in case it is a Unix domain
                   1223:         * socket.  After we destroy all the rights carried in
                   1224:         * messages, we do a last closef to get rid of our extra
                   1225:         * reference.  This is the last close, and the unp_detach etc
                   1226:         * will shut down the socket.
1.8       mycroft  1227:         *
                   1228:         * 91/09/19, bsy@cs.cmu.edu
                   1229:         */
                   1230:        extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1.54      matt     1231:        for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11      mycroft  1232:            fp = nextfp) {
1.54      matt     1233:                nextfp = LIST_NEXT(fp, f_list);
1.57      pk       1234:                simple_lock(&fp->f_slock);
                   1235:                if (fp->f_count != 0 &&
                   1236:                    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8       mycroft  1237:                        *fpp++ = fp;
                   1238:                        nunref++;
                   1239:                        fp->f_count++;
                   1240:                }
1.57      pk       1241:                simple_unlock(&fp->f_slock);
1.1       cgd      1242:        }
1.39      sommerfe 1243:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1244:                fp = *fpp;
1.57      pk       1245:                simple_lock(&fp->f_slock);
1.44      thorpej  1246:                FILE_USE(fp);
1.39      sommerfe 1247:                if (fp->f_type == DTYPE_SOCKET)
                   1248:                        sorflush((struct socket *)fp->f_data);
1.44      thorpej  1249:                FILE_UNUSE(fp, NULL);
1.39      sommerfe 1250:        }
1.44      thorpej  1251:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1252:                fp = *fpp;
1.57      pk       1253:                simple_lock(&fp->f_slock);
1.44      thorpej  1254:                FILE_USE(fp);
1.62      fvdl     1255:                (void) closef(fp, (struct proc *)0);
1.44      thorpej  1256:        }
1.8       mycroft  1257:        free((caddr_t)extra_ref, M_FILE);
1.1       cgd      1258:        unp_gcing = 0;
                   1259: }
                   1260:
1.5       andrew   1261: void
1.76      matt     1262: unp_dispose(struct mbuf *m)
1.1       cgd      1263: {
1.8       mycroft  1264:
1.1       cgd      1265:        if (m)
1.39      sommerfe 1266:                unp_scan(m, unp_discard, 1);
1.1       cgd      1267: }
                   1268:
1.5       andrew   1269: void
1.76      matt     1270: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1       cgd      1271: {
1.46      augustss 1272:        struct mbuf *m;
                   1273:        struct file **rp;
                   1274:        struct cmsghdr *cm;
                   1275:        int i;
1.1       cgd      1276:        int qfds;
                   1277:
                   1278:        while (m0) {
1.48      thorpej  1279:                for (m = m0; m; m = m->m_next) {
1.1       cgd      1280:                        if (m->m_type == MT_CONTROL &&
                   1281:                            m->m_len >= sizeof(*cm)) {
                   1282:                                cm = mtod(m, struct cmsghdr *);
                   1283:                                if (cm->cmsg_level != SOL_SOCKET ||
                   1284:                                    cm->cmsg_type != SCM_RIGHTS)
                   1285:                                        continue;
1.48      thorpej  1286:                                qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1287:                                    / sizeof(struct file *);
                   1288:                                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe 1289:                                for (i = 0; i < qfds; i++) {
                   1290:                                        struct file *fp = *rp;
                   1291:                                        if (discard)
                   1292:                                                *rp = 0;
                   1293:                                        (*op)(fp);
                   1294:                                        rp++;
                   1295:                                }
1.1       cgd      1296:                                break;          /* XXX, but saves time */
                   1297:                        }
1.48      thorpej  1298:                }
1.52      thorpej  1299:                m0 = m0->m_nextpkt;
1.1       cgd      1300:        }
                   1301: }
                   1302:
1.5       andrew   1303: void
1.76      matt     1304: unp_mark(struct file *fp)
1.1       cgd      1305: {
1.39      sommerfe 1306:        if (fp == NULL)
                   1307:                return;
1.80      perry    1308:
1.39      sommerfe 1309:        if (fp->f_flag & FMARK)
                   1310:                return;
1.1       cgd      1311:
1.39      sommerfe 1312:        /* If we're already deferred, don't screw up the defer count */
                   1313:        if (fp->f_flag & FDEFER)
1.1       cgd      1314:                return;
1.39      sommerfe 1315:
                   1316:        /*
                   1317:         * Minimize the number of deferrals...  Sockets are the only
                   1318:         * type of descriptor which can hold references to another
                   1319:         * descriptor, so just mark other descriptors, and defer
                   1320:         * unmarked sockets for the next pass.
                   1321:         */
                   1322:        if (fp->f_type == DTYPE_SOCKET) {
                   1323:                unp_defer++;
                   1324:                if (fp->f_count == 0)
                   1325:                        panic("unp_mark: queued unref");
                   1326:                fp->f_flag |= FDEFER;
                   1327:        } else {
                   1328:                fp->f_flag |= FMARK;
                   1329:        }
                   1330:        return;
1.1       cgd      1331: }
                   1332:
1.5       andrew   1333: void
1.76      matt     1334: unp_discard(struct file *fp)
1.1       cgd      1335: {
1.39      sommerfe 1336:        if (fp == NULL)
                   1337:                return;
1.57      pk       1338:        simple_lock(&fp->f_slock);
                   1339:        fp->f_usecount++;       /* i.e. FILE_USE(fp) sans locking */
1.1       cgd      1340:        fp->f_msgcount--;
1.57      pk       1341:        simple_unlock(&fp->f_slock);
1.1       cgd      1342:        unp_rights--;
1.62      fvdl     1343:        (void) closef(fp, (struct proc *)0);
1.1       cgd      1344: }

CVSweb <webmaster@jp.NetBSD.org>