[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.98

1.98    ! martin      1: /*     $NetBSD: uipc_usrreq.c,v 1.97 2007/04/22 08:30:00 dsl Exp $     */
1.30      thorpej     2:
                      3: /*-
1.77      matt        4:  * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.10      cgd        39:
1.1       cgd        40: /*
1.8       mycroft    41:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        43:  *
                     44:  * Redistribution and use in source and binary forms, with or without
                     45:  * modification, are permitted provided that the following conditions
                     46:  * are met:
                     47:  * 1. Redistributions of source code must retain the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
1.67      agc        52:  * 3. Neither the name of the University nor the names of its contributors
                     53:  *    may be used to endorse or promote products derived from this software
                     54:  *    without specific prior written permission.
                     55:  *
                     56:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     57:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     58:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     59:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     60:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     61:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     62:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     63:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     64:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     65:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     66:  * SUCH DAMAGE.
                     67:  *
                     68:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     69:  */
                     70:
                     71: /*
                     72:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     73:  *
                     74:  * Redistribution and use in source and binary forms, with or without
                     75:  * modification, are permitted provided that the following conditions
                     76:  * are met:
                     77:  * 1. Redistributions of source code must retain the above copyright
                     78:  *    notice, this list of conditions and the following disclaimer.
                     79:  * 2. Redistributions in binary form must reproduce the above copyright
                     80:  *    notice, this list of conditions and the following disclaimer in the
                     81:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        82:  * 3. All advertising materials mentioning features or use of this software
                     83:  *    must display the following acknowledgement:
                     84:  *     This product includes software developed by the University of
                     85:  *     California, Berkeley and its contributors.
                     86:  * 4. Neither the name of the University nor the names of its contributors
                     87:  *    may be used to endorse or promote products derived from this software
                     88:  *    without specific prior written permission.
                     89:  *
                     90:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     91:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     92:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     93:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     94:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     95:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     96:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     97:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     98:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     99:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    100:  * SUCH DAMAGE.
                    101:  *
1.31      fvdl      102:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd       103:  */
1.53      lukem     104:
                    105: #include <sys/cdefs.h>
1.98    ! martin    106: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.97 2007/04/22 08:30:00 dsl Exp $");
1.1       cgd       107:
1.7       mycroft   108: #include <sys/param.h>
1.8       mycroft   109: #include <sys/systm.h>
1.7       mycroft   110: #include <sys/proc.h>
                    111: #include <sys/filedesc.h>
                    112: #include <sys/domain.h>
                    113: #include <sys/protosw.h>
                    114: #include <sys/socket.h>
                    115: #include <sys/socketvar.h>
                    116: #include <sys/unpcb.h>
                    117: #include <sys/un.h>
                    118: #include <sys/namei.h>
                    119: #include <sys/vnode.h>
                    120: #include <sys/file.h>
                    121: #include <sys/stat.h>
                    122: #include <sys/mbuf.h>
1.91      elad      123: #include <sys/kauth.h>
1.1       cgd       124:
                    125: /*
                    126:  * Unix communications domain.
                    127:  *
                    128:  * TODO:
                    129:  *     SEQPACKET, RDM
                    130:  *     rethink name space problems
                    131:  *     need a proper out-of-band
                    132:  */
1.93      christos  133: const struct sockaddr_un sun_noname = {
                    134:        .sun_len = sizeof(sun_noname),
                    135:        .sun_family = AF_LOCAL,
                    136: };
1.1       cgd       137: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    138:
1.92      ad        139: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.30      thorpej   140:
1.20      mycroft   141: int
1.76      matt      142: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        143:        struct lwp *l)
1.20      mycroft   144: {
                    145:        struct socket *so2;
1.77      matt      146:        const struct sockaddr_un *sun;
1.20      mycroft   147:
                    148:        so2 = unp->unp_conn->unp_socket;
                    149:        if (unp->unp_addr)
                    150:                sun = unp->unp_addr;
                    151:        else
                    152:                sun = &sun_noname;
1.30      thorpej   153:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        154:                control = unp_addsockcred(l, control);
1.82      christos  155:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   156:            control) == 0) {
1.98    ! martin    157:                unp_dispose(control);
1.20      mycroft   158:                m_freem(control);
                    159:                m_freem(m);
1.79      darrenr   160:                so2->so_rcv.sb_overflowed++;
1.60      christos  161:                return (ENOBUFS);
1.20      mycroft   162:        } else {
                    163:                sorwakeup(so2);
                    164:                return (0);
                    165:        }
                    166: }
                    167:
                    168: void
1.76      matt      169: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   170: {
1.77      matt      171:        const struct sockaddr_un *sun;
1.20      mycroft   172:
                    173:        if (unp->unp_addr)
                    174:                sun = unp->unp_addr;
                    175:        else
                    176:                sun = &sun_noname;
                    177:        nam->m_len = sun->sun_len;
1.56      itojun    178:        if (nam->m_len > MLEN)
1.27      thorpej   179:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  180:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   181: }
                    182:
                    183: void
1.76      matt      184: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   185: {
1.77      matt      186:        const struct sockaddr_un *sun;
1.20      mycroft   187:
                    188:        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    189:                sun = unp->unp_conn->unp_addr;
                    190:        else
                    191:                sun = &sun_noname;
                    192:        nam->m_len = sun->sun_len;
1.56      itojun    193:        if (nam->m_len > MLEN)
1.27      thorpej   194:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  195:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   196: }
                    197:
1.1       cgd       198: /*ARGSUSED*/
1.5       andrew    199: int
1.76      matt      200: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86      christos  201:        struct mbuf *control, struct lwp *l)
1.1       cgd       202: {
                    203:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  204:        struct socket *so2;
1.86      christos  205:        struct proc *p;
1.75      christos  206:        u_int newhiwat;
1.46      augustss  207:        int error = 0;
1.1       cgd       208:
                    209:        if (req == PRU_CONTROL)
                    210:                return (EOPNOTSUPP);
1.20      mycroft   211:
1.22      mycroft   212: #ifdef DIAGNOSTIC
                    213:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    214:                panic("uipc_usrreq: unexpected control mbuf");
                    215: #endif
1.86      christos  216:        p = l ? l->l_proc : NULL;
1.1       cgd       217:        if (unp == 0 && req != PRU_ATTACH) {
                    218:                error = EINVAL;
                    219:                goto release;
                    220:        }
1.20      mycroft   221:
1.1       cgd       222:        switch (req) {
                    223:
                    224:        case PRU_ATTACH:
1.20      mycroft   225:                if (unp != 0) {
1.1       cgd       226:                        error = EISCONN;
                    227:                        break;
                    228:                }
                    229:                error = unp_attach(so);
                    230:                break;
                    231:
                    232:        case PRU_DETACH:
                    233:                unp_detach(unp);
                    234:                break;
                    235:
                    236:        case PRU_BIND:
1.90      christos  237:                KASSERT(l != NULL);
1.86      christos  238:                error = unp_bind(unp, nam, l);
1.1       cgd       239:                break;
                    240:
                    241:        case PRU_LISTEN:
                    242:                if (unp->unp_vnode == 0)
                    243:                        error = EINVAL;
                    244:                break;
                    245:
                    246:        case PRU_CONNECT:
1.90      christos  247:                KASSERT(l != NULL);
1.86      christos  248:                error = unp_connect(so, nam, l);
1.1       cgd       249:                break;
                    250:
                    251:        case PRU_CONNECT2:
1.72      matt      252:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       253:                break;
                    254:
                    255:        case PRU_DISCONNECT:
                    256:                unp_disconnect(unp);
                    257:                break;
                    258:
                    259:        case PRU_ACCEPT:
1.20      mycroft   260:                unp_setpeeraddr(unp, nam);
1.72      matt      261:                /*
                    262:                 * Mark the initiating STREAM socket as connected *ONLY*
                    263:                 * after it's been accepted.  This prevents a client from
                    264:                 * overrunning a server and receiving ECONNREFUSED.
                    265:                 */
                    266:                if (unp->unp_conn != NULL &&
                    267:                    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
                    268:                        soisconnected(unp->unp_conn->unp_socket);
1.1       cgd       269:                break;
                    270:
                    271:        case PRU_SHUTDOWN:
                    272:                socantsendmore(so);
                    273:                unp_shutdown(unp);
                    274:                break;
                    275:
                    276:        case PRU_RCVD:
                    277:                switch (so->so_type) {
                    278:
                    279:                case SOCK_DGRAM:
                    280:                        panic("uipc 1");
                    281:                        /*NOTREACHED*/
                    282:
                    283:                case SOCK_STREAM:
                    284: #define        rcv (&so->so_rcv)
                    285: #define snd (&so2->so_snd)
                    286:                        if (unp->unp_conn == 0)
                    287:                                break;
                    288:                        so2 = unp->unp_conn->unp_socket;
                    289:                        /*
                    290:                         * Adjust backpressure on sender
                    291:                         * and wakeup any waiting to write.
                    292:                         */
                    293:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    294:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  295:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  296:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  297:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       298:                        unp->unp_cc = rcv->sb_cc;
                    299:                        sowwakeup(so2);
                    300: #undef snd
                    301: #undef rcv
                    302:                        break;
                    303:
                    304:                default:
                    305:                        panic("uipc 2");
                    306:                }
                    307:                break;
                    308:
                    309:        case PRU_SEND:
1.30      thorpej   310:                /*
                    311:                 * Note: unp_internalize() rejects any control message
                    312:                 * other than SCM_RIGHTS, and only allows one.  This
                    313:                 * has the side-effect of preventing a caller from
                    314:                 * forging SCM_CREDS.
                    315:                 */
1.90      christos  316:                if (control) {
                    317:                        KASSERT(l != NULL);
                    318:                        if ((error = unp_internalize(control, l)) != 0)
                    319:                                goto die;
1.83      yamt      320:                }
1.1       cgd       321:                switch (so->so_type) {
                    322:
                    323:                case SOCK_DGRAM: {
                    324:                        if (nam) {
1.20      mycroft   325:                                if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1       cgd       326:                                        error = EISCONN;
1.21      mycroft   327:                                        goto die;
1.1       cgd       328:                                }
1.90      christos  329:                                KASSERT(l != NULL);
1.86      christos  330:                                error = unp_connect(so, nam, l);
1.20      mycroft   331:                                if (error) {
1.23      mycroft   332:                                die:
1.98    ! martin    333:                                        unp_dispose(control);
1.21      mycroft   334:                                        m_freem(control);
1.20      mycroft   335:                                        m_freem(m);
1.1       cgd       336:                                        break;
1.20      mycroft   337:                                }
1.1       cgd       338:                        } else {
1.20      mycroft   339:                                if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1       cgd       340:                                        error = ENOTCONN;
1.21      mycroft   341:                                        goto die;
1.1       cgd       342:                                }
                    343:                        }
1.89      christos  344:                        KASSERT(p != NULL);
1.92      ad        345:                        error = unp_output(m, control, unp, l);
1.1       cgd       346:                        if (nam)
                    347:                                unp_disconnect(unp);
                    348:                        break;
                    349:                }
                    350:
                    351:                case SOCK_STREAM:
                    352: #define        rcv (&so2->so_rcv)
                    353: #define        snd (&so->so_snd)
1.87      christos  354:                        if (unp->unp_conn == NULL) {
                    355:                                error = ENOTCONN;
                    356:                                break;
                    357:                        }
1.1       cgd       358:                        so2 = unp->unp_conn->unp_socket;
1.30      thorpej   359:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    360:                                /*
                    361:                                 * Credentials are passed only once on
                    362:                                 * SOCK_STREAM.
                    363:                                 */
                    364:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        365:                                control = unp_addsockcred(l, control);
1.30      thorpej   366:                        }
1.1       cgd       367:                        /*
                    368:                         * Send to paired receive port, and then reduce
                    369:                         * send buffer hiwater marks to maintain backpressure.
                    370:                         * Wake up readers.
                    371:                         */
                    372:                        if (control) {
1.98    ! martin    373:                                if (sbappendcontrol(rcv, m, control) == 0) {
        !           374:                                        unp_dispose(control);
1.21      mycroft   375:                                        m_freem(control);
1.98    ! martin    376:                                }
1.1       cgd       377:                        } else
                    378:                                sbappend(rcv, m);
                    379:                        snd->sb_mbmax -=
                    380:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    381:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  382:                        newhiwat = snd->sb_hiwat -
                    383:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  384:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  385:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       386:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    387:                        sorwakeup(so2);
                    388: #undef snd
                    389: #undef rcv
                    390:                        break;
                    391:
                    392:                default:
                    393:                        panic("uipc 4");
                    394:                }
                    395:                break;
                    396:
                    397:        case PRU_ABORT:
                    398:                unp_drop(unp, ECONNABORTED);
1.39      sommerfe  399:
1.88      matt      400:                KASSERT(so->so_head == NULL);
1.39      sommerfe  401: #ifdef DIAGNOSTIC
                    402:                if (so->so_pcb == 0)
                    403:                        panic("uipc 5: drop killed pcb");
                    404: #endif
                    405:                unp_detach(unp);
1.1       cgd       406:                break;
                    407:
                    408:        case PRU_SENSE:
                    409:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
                    410:                if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
                    411:                        so2 = unp->unp_conn->unp_socket;
                    412:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
                    413:                }
                    414:                ((struct stat *) m)->st_dev = NODEV;
                    415:                if (unp->unp_ino == 0)
                    416:                        unp->unp_ino = unp_ino++;
1.25      kleink    417:                ((struct stat *) m)->st_atimespec =
                    418:                    ((struct stat *) m)->st_mtimespec =
                    419:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       420:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    421:                return (0);
                    422:
                    423:        case PRU_RCVOOB:
1.20      mycroft   424:                error = EOPNOTSUPP;
                    425:                break;
1.1       cgd       426:
                    427:        case PRU_SENDOOB:
1.22      mycroft   428:                m_freem(control);
1.20      mycroft   429:                m_freem(m);
1.1       cgd       430:                error = EOPNOTSUPP;
                    431:                break;
                    432:
                    433:        case PRU_SOCKADDR:
1.20      mycroft   434:                unp_setsockaddr(unp, nam);
1.1       cgd       435:                break;
                    436:
                    437:        case PRU_PEERADDR:
1.20      mycroft   438:                unp_setpeeraddr(unp, nam);
1.1       cgd       439:                break;
                    440:
                    441:        default:
                    442:                panic("piusrreq");
                    443:        }
1.20      mycroft   444:
1.1       cgd       445: release:
                    446:        return (error);
                    447: }
                    448:
                    449: /*
1.30      thorpej   450:  * Unix domain socket option processing.
                    451:  */
                    452: int
1.76      matt      453: uipc_ctloutput(int op, struct socket *so, int level, int optname,
                    454:        struct mbuf **mp)
1.30      thorpej   455: {
                    456:        struct unpcb *unp = sotounpcb(so);
                    457:        struct mbuf *m = *mp;
                    458:        int optval = 0, error = 0;
                    459:
                    460:        if (level != 0) {
                    461:                error = EINVAL;
                    462:                if (op == PRCO_SETOPT && m)
                    463:                        (void) m_free(m);
                    464:        } else switch (op) {
                    465:
                    466:        case PRCO_SETOPT:
                    467:                switch (optname) {
                    468:                case LOCAL_CREDS:
1.72      matt      469:                case LOCAL_CONNWAIT:
1.30      thorpej   470:                        if (m == NULL || m->m_len != sizeof(int))
                    471:                                error = EINVAL;
                    472:                        else {
                    473:                                optval = *mtod(m, int *);
                    474:                                switch (optname) {
                    475: #define        OPTSET(bit) \
                    476:        if (optval) \
                    477:                unp->unp_flags |= (bit); \
                    478:        else \
                    479:                unp->unp_flags &= ~(bit);
                    480:
                    481:                                case LOCAL_CREDS:
                    482:                                        OPTSET(UNP_WANTCRED);
                    483:                                        break;
1.72      matt      484:                                case LOCAL_CONNWAIT:
                    485:                                        OPTSET(UNP_CONNWAIT);
                    486:                                        break;
1.30      thorpej   487:                                }
                    488:                        }
                    489:                        break;
                    490: #undef OPTSET
                    491:
                    492:                default:
                    493:                        error = ENOPROTOOPT;
                    494:                        break;
                    495:                }
                    496:                if (m)
                    497:                        (void) m_free(m);
                    498:                break;
                    499:
                    500:        case PRCO_GETOPT:
                    501:                switch (optname) {
                    502:                case LOCAL_CREDS:
                    503:                        *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    504:                        m->m_len = sizeof(int);
                    505:                        switch (optname) {
                    506:
                    507: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    508:
                    509:                        case LOCAL_CREDS:
                    510:                                optval = OPTBIT(UNP_WANTCRED);
                    511:                                break;
                    512:                        }
                    513:                        *mtod(m, int *) = optval;
                    514:                        break;
                    515: #undef OPTBIT
                    516:
                    517:                default:
                    518:                        error = ENOPROTOOPT;
                    519:                        break;
                    520:                }
                    521:                break;
                    522:        }
                    523:        return (error);
                    524: }
                    525:
                    526: /*
1.1       cgd       527:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    528:  * for stream sockets, although the total for sender and receiver is
                    529:  * actually only PIPSIZ.
                    530:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    531:  * and don't really want to reserve the sendspace.  Their recvspace should
                    532:  * be large enough for at least one max-size datagram plus address.
                    533:  */
                    534: #define        PIPSIZ  4096
                    535: u_long unpst_sendspace = PIPSIZ;
                    536: u_long unpst_recvspace = PIPSIZ;
                    537: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    538: u_long unpdg_recvspace = 4*1024;
                    539:
                    540: int    unp_rights;                     /* file descriptors in flight */
                    541:
1.5       andrew    542: int
1.76      matt      543: unp_attach(struct socket *so)
1.1       cgd       544: {
1.46      augustss  545:        struct unpcb *unp;
1.1       cgd       546:        int error;
1.80      perry     547:
1.1       cgd       548:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    549:                switch (so->so_type) {
                    550:
                    551:                case SOCK_STREAM:
                    552:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
                    553:                        break;
                    554:
                    555:                case SOCK_DGRAM:
                    556:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
                    557:                        break;
1.8       mycroft   558:
                    559:                default:
                    560:                        panic("unp_attach");
1.1       cgd       561:                }
                    562:                if (error)
                    563:                        return (error);
                    564:        }
1.14      mycroft   565:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    566:        if (unp == NULL)
1.1       cgd       567:                return (ENOBUFS);
1.95      christos  568:        memset((void *)unp, 0, sizeof(*unp));
1.14      mycroft   569:        unp->unp_socket = so;
1.15      mycroft   570:        so->so_pcb = unp;
1.85      simonb    571:        nanotime(&unp->unp_ctime);
1.1       cgd       572:        return (0);
                    573: }
                    574:
1.17      pk        575: void
1.76      matt      576: unp_detach(struct unpcb *unp)
1.1       cgd       577: {
1.80      perry     578:
1.1       cgd       579:        if (unp->unp_vnode) {
                    580:                unp->unp_vnode->v_socket = 0;
                    581:                vrele(unp->unp_vnode);
                    582:                unp->unp_vnode = 0;
                    583:        }
                    584:        if (unp->unp_conn)
                    585:                unp_disconnect(unp);
                    586:        while (unp->unp_refs)
                    587:                unp_drop(unp->unp_refs, ECONNRESET);
                    588:        soisdisconnected(unp->unp_socket);
                    589:        unp->unp_socket->so_pcb = 0;
1.20      mycroft   590:        if (unp->unp_addr)
1.26      thorpej   591:                free(unp->unp_addr, M_SONAME);
1.8       mycroft   592:        if (unp_rights) {
                    593:                /*
                    594:                 * Normally the receive buffer is flushed later,
                    595:                 * in sofree, but if our receive buffer holds references
                    596:                 * to descriptors that are now garbage, we will dispose
                    597:                 * of those descriptor references after the garbage collector
                    598:                 * gets them (resulting in a "panic: closef: count < 0").
                    599:                 */
                    600:                sorflush(unp->unp_socket);
1.14      mycroft   601:                free(unp, M_PCB);
1.1       cgd       602:                unp_gc();
1.14      mycroft   603:        } else
                    604:                free(unp, M_PCB);
1.1       cgd       605: }
                    606:
1.5       andrew    607: int
1.86      christos  608: unp_bind(struct unpcb *unp, struct mbuf *nam, struct lwp *l)
1.1       cgd       609: {
1.27      thorpej   610:        struct sockaddr_un *sun;
1.46      augustss  611:        struct vnode *vp;
1.1       cgd       612:        struct vattr vattr;
1.27      thorpej   613:        size_t addrlen;
1.86      christos  614:        struct proc *p;
1.1       cgd       615:        int error;
                    616:        struct nameidata nd;
                    617:
1.20      mycroft   618:        if (unp->unp_vnode != 0)
                    619:                return (EINVAL);
1.27      thorpej   620:
1.86      christos  621:        p = l->l_proc;
1.27      thorpej   622:        /*
                    623:         * Allocate the new sockaddr.  We have to allocate one
                    624:         * extra byte so that we can ensure that the pathname
                    625:         * is nul-terminated.
                    626:         */
                    627:        addrlen = nam->m_len + 1;
                    628:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  629:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   630:        *(((char *)sun) + nam->m_len) = '\0';
                    631:
1.97      dsl       632:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE,
1.86      christos  633:            sun->sun_path, l);
1.27      thorpej   634:
1.1       cgd       635: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16      christos  636:        if ((error = namei(&nd)) != 0)
1.27      thorpej   637:                goto bad;
1.9       mycroft   638:        vp = nd.ni_vp;
1.96      hannken   639:        if (vp != NULL) {
1.9       mycroft   640:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    641:                if (nd.ni_dvp == vp)
                    642:                        vrele(nd.ni_dvp);
1.1       cgd       643:                else
1.9       mycroft   644:                        vput(nd.ni_dvp);
1.1       cgd       645:                vrele(vp);
1.96      hannken   646:                error = EADDRINUSE;
                    647:                goto bad;
1.1       cgd       648:        }
                    649:        VATTR_NULL(&vattr);
                    650:        vattr.va_type = VSOCK;
1.84      jmmv      651:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.92      ad        652:        VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1.16      christos  653:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
                    654:        if (error)
1.27      thorpej   655:                goto bad;
1.9       mycroft   656:        vp = nd.ni_vp;
1.1       cgd       657:        vp->v_socket = unp->unp_socket;
                    658:        unp->unp_vnode = vp;
1.27      thorpej   659:        unp->unp_addrlen = addrlen;
                    660:        unp->unp_addr = sun;
1.31      fvdl      661:        VOP_UNLOCK(vp, 0);
1.1       cgd       662:        return (0);
1.27      thorpej   663:
                    664:  bad:
                    665:        free(sun, M_SONAME);
                    666:        return (error);
1.1       cgd       667: }
                    668:
1.5       andrew    669: int
1.86      christos  670: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       671: {
1.46      augustss  672:        struct sockaddr_un *sun;
                    673:        struct vnode *vp;
                    674:        struct socket *so2, *so3;
1.1       cgd       675:        struct unpcb *unp2, *unp3;
1.27      thorpej   676:        size_t addrlen;
1.1       cgd       677:        int error;
                    678:        struct nameidata nd;
                    679:
1.27      thorpej   680:        /*
                    681:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    682:         * byte so that we can ensure that the pathname is nul-terminated.
                    683:         * When we establish the connection, we copy the other PCB's
                    684:         * sockaddr to our own.
                    685:         */
                    686:        addrlen = nam->m_len + 1;
                    687:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  688:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   689:        *(((char *)sun) + nam->m_len) = '\0';
                    690:
1.97      dsl       691:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, sun->sun_path, l);
1.27      thorpej   692:
1.16      christos  693:        if ((error = namei(&nd)) != 0)
1.27      thorpej   694:                goto bad2;
1.9       mycroft   695:        vp = nd.ni_vp;
1.1       cgd       696:        if (vp->v_type != VSOCK) {
                    697:                error = ENOTSOCK;
                    698:                goto bad;
                    699:        }
1.92      ad        700:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) != 0)
1.1       cgd       701:                goto bad;
                    702:        so2 = vp->v_socket;
                    703:        if (so2 == 0) {
                    704:                error = ECONNREFUSED;
                    705:                goto bad;
                    706:        }
                    707:        if (so->so_type != so2->so_type) {
                    708:                error = EPROTOTYPE;
                    709:                goto bad;
                    710:        }
                    711:        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                    712:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    713:                    (so3 = sonewconn(so2, 0)) == 0) {
                    714:                        error = ECONNREFUSED;
                    715:                        goto bad;
                    716:                }
                    717:                unp2 = sotounpcb(so2);
                    718:                unp3 = sotounpcb(so3);
1.26      thorpej   719:                if (unp2->unp_addr) {
                    720:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                    721:                            M_SONAME, M_WAITOK);
1.36      perry     722:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej   723:                            unp2->unp_addrlen);
                    724:                        unp3->unp_addrlen = unp2->unp_addrlen;
                    725:                }
1.30      thorpej   726:                unp3->unp_flags = unp2->unp_flags;
1.33      thorpej   727:                so2 = so3;
                    728:        }
1.72      matt      729:        error = unp_connect2(so, so2, PRU_CONNECT);
1.27      thorpej   730:  bad:
1.1       cgd       731:        vput(vp);
1.27      thorpej   732:  bad2:
                    733:        free(sun, M_SONAME);
1.1       cgd       734:        return (error);
                    735: }
                    736:
1.5       andrew    737: int
1.76      matt      738: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd       739: {
1.46      augustss  740:        struct unpcb *unp = sotounpcb(so);
                    741:        struct unpcb *unp2;
1.1       cgd       742:
                    743:        if (so2->so_type != so->so_type)
                    744:                return (EPROTOTYPE);
                    745:        unp2 = sotounpcb(so2);
                    746:        unp->unp_conn = unp2;
                    747:        switch (so->so_type) {
                    748:
                    749:        case SOCK_DGRAM:
                    750:                unp->unp_nextref = unp2->unp_refs;
                    751:                unp2->unp_refs = unp;
                    752:                soisconnected(so);
                    753:                break;
                    754:
                    755:        case SOCK_STREAM:
                    756:                unp2->unp_conn = unp;
1.72      matt      757:                if (req == PRU_CONNECT &&
                    758:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                    759:                        soisconnecting(so);
                    760:                else
                    761:                        soisconnected(so);
1.1       cgd       762:                soisconnected(so2);
                    763:                break;
                    764:
                    765:        default:
                    766:                panic("unp_connect2");
                    767:        }
                    768:        return (0);
                    769: }
                    770:
1.5       andrew    771: void
1.76      matt      772: unp_disconnect(struct unpcb *unp)
1.1       cgd       773: {
1.46      augustss  774:        struct unpcb *unp2 = unp->unp_conn;
1.1       cgd       775:
                    776:        if (unp2 == 0)
                    777:                return;
                    778:        unp->unp_conn = 0;
                    779:        switch (unp->unp_socket->so_type) {
                    780:
                    781:        case SOCK_DGRAM:
                    782:                if (unp2->unp_refs == unp)
                    783:                        unp2->unp_refs = unp->unp_nextref;
                    784:                else {
                    785:                        unp2 = unp2->unp_refs;
                    786:                        for (;;) {
                    787:                                if (unp2 == 0)
                    788:                                        panic("unp_disconnect");
                    789:                                if (unp2->unp_nextref == unp)
                    790:                                        break;
                    791:                                unp2 = unp2->unp_nextref;
                    792:                        }
                    793:                        unp2->unp_nextref = unp->unp_nextref;
                    794:                }
                    795:                unp->unp_nextref = 0;
                    796:                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                    797:                break;
                    798:
                    799:        case SOCK_STREAM:
                    800:                soisdisconnected(unp->unp_socket);
                    801:                unp2->unp_conn = 0;
                    802:                soisdisconnected(unp2->unp_socket);
                    803:                break;
                    804:        }
                    805: }
                    806:
                    807: #ifdef notdef
1.76      matt      808: unp_abort(struct unpcb *unp)
1.1       cgd       809: {
                    810:        unp_detach(unp);
                    811: }
                    812: #endif
                    813:
1.5       andrew    814: void
1.76      matt      815: unp_shutdown(struct unpcb *unp)
1.1       cgd       816: {
                    817:        struct socket *so;
                    818:
                    819:        if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
                    820:            (so = unp->unp_conn->unp_socket))
                    821:                socantrcvmore(so);
                    822: }
                    823:
1.5       andrew    824: void
1.76      matt      825: unp_drop(struct unpcb *unp, int errno)
1.1       cgd       826: {
                    827:        struct socket *so = unp->unp_socket;
                    828:
                    829:        so->so_error = errno;
                    830:        unp_disconnect(unp);
                    831:        if (so->so_head) {
1.15      mycroft   832:                so->so_pcb = 0;
1.14      mycroft   833:                sofree(so);
1.20      mycroft   834:                if (unp->unp_addr)
1.26      thorpej   835:                        free(unp->unp_addr, M_SONAME);
1.14      mycroft   836:                free(unp, M_PCB);
1.1       cgd       837:        }
                    838: }
                    839:
                    840: #ifdef notdef
1.76      matt      841: unp_drain(void)
1.1       cgd       842: {
                    843:
                    844: }
                    845: #endif
                    846:
1.5       andrew    847: int
1.86      christos  848: unp_externalize(struct mbuf *rights, struct lwp *l)
1.1       cgd       849: {
1.46      augustss  850:        struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.86      christos  851:        struct proc *p = l->l_proc;
1.47      thorpej   852:        int i, *fdp;
1.46      augustss  853:        struct file **rp;
                    854:        struct file *fp;
1.50      thorpej   855:        int nfds, error = 0;
1.47      thorpej   856:
                    857:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
                    858:            sizeof(struct file *);
                    859:        rp = (struct file **)CMSG_DATA(cm);
1.1       cgd       860:
1.50      thorpej   861:        fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
                    862:
1.39      sommerfe  863:        /* Make sure the recipient should be able to see the descriptors.. */
1.42      thorpej   864:        if (p->p_cwdi->cwdi_rdir != NULL) {
1.48      thorpej   865:                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  866:                for (i = 0; i < nfds; i++) {
                    867:                        fp = *rp++;
                    868:                        /*
                    869:                         * If we are in a chroot'ed directory, and
                    870:                         * someone wants to pass us a directory, make
                    871:                         * sure it's inside the subtree we're allowed
                    872:                         * to access.
                    873:                         */
                    874:                        if (fp->f_type == DTYPE_VNODE) {
                    875:                                struct vnode *vp = (struct vnode *)fp->f_data;
                    876:                                if ((vp->v_type == VDIR) &&
1.86      christos  877:                                    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
1.39      sommerfe  878:                                        error = EPERM;
                    879:                                        break;
                    880:                                }
                    881:                        }
                    882:                }
                    883:        }
1.50      thorpej   884:
                    885:  restart:
1.47      thorpej   886:        rp = (struct file **)CMSG_DATA(cm);
1.50      thorpej   887:        if (error != 0) {
1.24      cgd       888:                for (i = 0; i < nfds; i++) {
1.1       cgd       889:                        fp = *rp;
1.39      sommerfe  890:                        /*
                    891:                         * zero the pointer before calling unp_discard,
                    892:                         * since it may end up in unp_gc()..
                    893:                         */
                    894:                        *rp++ = 0;
1.1       cgd       895:                        unp_discard(fp);
                    896:                }
1.50      thorpej   897:                goto out;
1.1       cgd       898:        }
1.50      thorpej   899:
1.24      cgd       900:        /*
1.50      thorpej   901:         * First loop -- allocate file descriptor table slots for the
                    902:         * new descriptors.
1.24      cgd       903:         */
                    904:        for (i = 0; i < nfds; i++) {
1.39      sommerfe  905:                fp = *rp++;
1.50      thorpej   906:                if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej   907:                        /*
1.50      thorpej   908:                         * Back out what we've done so far.
1.49      thorpej   909:                         */
1.50      thorpej   910:                        for (--i; i >= 0; i--)
                    911:                                fdremove(p->p_fd, fdp[i]);
                    912:
                    913:                        if (error == ENOSPC) {
                    914:                                fdexpand(p);
                    915:                                error = 0;
                    916:                        } else {
                    917:                                /*
                    918:                                 * This is the error that has historically
                    919:                                 * been returned, and some callers may
                    920:                                 * expect it.
                    921:                                 */
                    922:                                error = EMSGSIZE;
                    923:                        }
                    924:                        goto restart;
1.49      thorpej   925:                }
1.50      thorpej   926:
                    927:                /*
                    928:                 * Make the slot reference the descriptor so that
                    929:                 * fdalloc() works properly.. We finalize it all
                    930:                 * in the loop below.
                    931:                 */
                    932:                p->p_fd->fd_ofiles[fdp[i]] = fp;
1.1       cgd       933:        }
1.24      cgd       934:
                    935:        /*
1.50      thorpej   936:         * Now that adding them has succeeded, update all of the
                    937:         * descriptor passing state.
1.24      cgd       938:         */
1.50      thorpej   939:        rp = (struct file **)CMSG_DATA(cm);
                    940:        for (i = 0; i < nfds; i++) {
                    941:                fp = *rp++;
                    942:                fp->f_msgcount--;
                    943:                unp_rights--;
                    944:        }
                    945:
                    946:        /*
                    947:         * Copy temporary array to message and adjust length, in case of
                    948:         * transition from large struct file pointers to ints.
                    949:         */
                    950:        memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47      thorpej   951:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                    952:        rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50      thorpej   953:  out:
                    954:        free(fdp, M_TEMP);
                    955:        return (error);
1.1       cgd       956: }
                    957:
1.5       andrew    958: int
1.86      christos  959: unp_internalize(struct mbuf *control, struct lwp *l)
1.1       cgd       960: {
1.86      christos  961:        struct proc *p = l->l_proc;
1.24      cgd       962:        struct filedesc *fdescp = p->p_fd;
1.73      martin    963:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
                    964:        struct file **rp, **files;
1.46      augustss  965:        struct file *fp;
                    966:        int i, fd, *fdp;
1.24      cgd       967:        int nfds;
                    968:        u_int neededspace;
1.38      thorpej   969:
1.24      cgd       970:        /* Sanity check the control message header */
1.66      jdolecek  971:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1       cgd       972:            cm->cmsg_len != control->m_len)
                    973:                return (EINVAL);
1.24      cgd       974:
                    975:        /* Verify that the file descriptors are valid */
1.47      thorpej   976:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                    977:        fdp = (int *)CMSG_DATA(cm);
1.24      cgd       978:        for (i = 0; i < nfds; i++) {
                    979:                fd = *fdp++;
1.58      pk        980:                if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1       cgd       981:                        return (EBADF);
1.58      pk        982:                simple_unlock(&fp->f_slock);
1.1       cgd       983:        }
1.24      cgd       984:
                    985:        /* Make sure we have room for the struct file pointers */
1.47      thorpej   986:        neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
                    987:            control->m_len;
1.24      cgd       988:        if (neededspace > M_TRAILINGSPACE(control)) {
                    989:
1.73      martin    990:                /* allocate new space and copy header into it */
                    991:                newcm = malloc(
                    992:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                    993:                    M_MBUF, M_WAITOK);
                    994:                if (newcm == NULL)
1.24      cgd       995:                        return (E2BIG);
1.73      martin    996:                memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80      perry     997:                files = (struct file **)CMSG_DATA(newcm);
1.73      martin    998:        } else {
                    999:                /* we can convert in-place */
                   1000:                newcm = NULL;
                   1001:                files = (struct file **)CMSG_DATA(cm);
1.24      cgd      1002:        }
                   1003:
                   1004:        /*
                   1005:         * Transform the file descriptors into struct file pointers, in
                   1006:         * reverse order so that if pointers are bigger than ints, the
                   1007:         * int won't get until we're done.
                   1008:         */
1.94      cbiere   1009:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1010:        rp = files + nfds;
1.24      cgd      1011:        for (i = 0; i < nfds; i++) {
1.94      cbiere   1012:                fp = fdescp->fd_ofiles[*--fdp];
1.57      pk       1013:                simple_lock(&fp->f_slock);
                   1014: #ifdef DIAGNOSTIC
                   1015:                if (fp->f_iflags & FIF_WANTCLOSE)
                   1016:                        panic("unp_internalize: file already closed");
                   1017: #endif
1.94      cbiere   1018:                *--rp = fp;
1.1       cgd      1019:                fp->f_count++;
                   1020:                fp->f_msgcount++;
1.57      pk       1021:                simple_unlock(&fp->f_slock);
1.1       cgd      1022:                unp_rights++;
                   1023:        }
1.73      martin   1024:
                   1025:        if (newcm) {
                   1026:                if (control->m_flags & M_EXT)
                   1027:                        MEXTREMOVE(control);
                   1028:                MEXTADD(control, newcm,
                   1029:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                   1030:                    M_MBUF, NULL, NULL);
                   1031:                cm = newcm;
                   1032:        }
                   1033:
                   1034:        /* adjust message & mbuf to note amount of space actually used. */
                   1035:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
                   1036:        control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
                   1037:
1.1       cgd      1038:        return (0);
1.30      thorpej  1039: }
                   1040:
                   1041: struct mbuf *
1.92      ad       1042: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1043: {
                   1044:        struct cmsghdr *cmp;
                   1045:        struct sockcred *sc;
                   1046:        struct mbuf *m, *n;
1.47      thorpej  1047:        int len, space, i;
1.30      thorpej  1048:
1.92      ad       1049:        len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
                   1050:        space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30      thorpej  1051:
                   1052:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1053:        if (space > MLEN) {
                   1054:                if (space > MCLBYTES)
                   1055:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1056:                else
1.59      matt     1057:                        m_clget(m, M_WAIT);
1.30      thorpej  1058:                if ((m->m_flags & M_EXT) == 0) {
                   1059:                        m_free(m);
                   1060:                        return (control);
                   1061:                }
                   1062:        }
                   1063:
1.47      thorpej  1064:        m->m_len = space;
1.30      thorpej  1065:        m->m_next = NULL;
                   1066:        cmp = mtod(m, struct cmsghdr *);
                   1067:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1068:        cmp->cmsg_len = len;
                   1069:        cmp->cmsg_level = SOL_SOCKET;
                   1070:        cmp->cmsg_type = SCM_CREDS;
1.92      ad       1071:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1072:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1073:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1074:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1075:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30      thorpej  1076:        for (i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1077:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1078:
                   1079:        /*
                   1080:         * If a control message already exists, append us to the end.
                   1081:         */
                   1082:        if (control != NULL) {
                   1083:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1084:                        ;
                   1085:                n->m_next = m;
                   1086:        } else
                   1087:                control = m;
                   1088:
                   1089:        return (control);
1.1       cgd      1090: }
                   1091:
                   1092: int    unp_defer, unp_gcing;
                   1093: extern struct domain unixdomain;
                   1094:
1.39      sommerfe 1095: /*
                   1096:  * Comment added long after the fact explaining what's going on here.
                   1097:  * Do a mark-sweep GC of file descriptors on the system, to free up
                   1098:  * any which are caught in flight to an about-to-be-closed socket.
                   1099:  *
                   1100:  * Traditional mark-sweep gc's start at the "root", and mark
                   1101:  * everything reachable from the root (which, in our case would be the
                   1102:  * process table).  The mark bits are cleared during the sweep.
                   1103:  *
                   1104:  * XXX For some inexplicable reason (perhaps because the file
                   1105:  * descriptor tables used to live in the u area which could be swapped
                   1106:  * out and thus hard to reach), we do multiple scans over the set of
                   1107:  * descriptors, using use *two* mark bits per object (DEFER and MARK).
                   1108:  * Whenever we find a descriptor which references other descriptors,
                   1109:  * the ones it references are marked with both bits, and we iterate
                   1110:  * over the whole file table until there are no more DEFER bits set.
                   1111:  * We also make an extra pass *before* the GC to clear the mark bits,
                   1112:  * which could have been cleared at almost no cost during the previous
                   1113:  * sweep.
                   1114:  *
                   1115:  * XXX MP: this needs to run with locks such that no other thread of
                   1116:  * control can create or destroy references to file descriptors. it
                   1117:  * may be necessary to defer the GC until later (when the locking
                   1118:  * situation is more hospitable); it may be necessary to push this
                   1119:  * into a separate thread.
                   1120:  */
1.5       andrew   1121: void
1.76      matt     1122: unp_gc(void)
1.1       cgd      1123: {
1.46      augustss 1124:        struct file *fp, *nextfp;
                   1125:        struct socket *so, *so1;
1.8       mycroft  1126:        struct file **extra_ref, **fpp;
                   1127:        int nunref, i;
1.1       cgd      1128:
                   1129:        if (unp_gcing)
                   1130:                return;
                   1131:        unp_gcing = 1;
                   1132:        unp_defer = 0;
1.39      sommerfe 1133:
                   1134:        /* Clear mark bits */
1.54      matt     1135:        LIST_FOREACH(fp, &filehead, f_list)
1.1       cgd      1136:                fp->f_flag &= ~(FMARK|FDEFER);
1.39      sommerfe 1137:
                   1138:        /*
                   1139:         * Iterate over the set of descriptors, marking ones believed
                   1140:         * (based on refcount) to be referenced from a process, and
                   1141:         * marking for rescan descriptors which are queued on a socket.
                   1142:         */
1.1       cgd      1143:        do {
1.54      matt     1144:                LIST_FOREACH(fp, &filehead, f_list) {
1.1       cgd      1145:                        if (fp->f_flag & FDEFER) {
                   1146:                                fp->f_flag &= ~FDEFER;
                   1147:                                unp_defer--;
1.39      sommerfe 1148: #ifdef DIAGNOSTIC
                   1149:                                if (fp->f_count == 0)
                   1150:                                        panic("unp_gc: deferred unreferenced socket");
                   1151: #endif
1.1       cgd      1152:                        } else {
1.39      sommerfe 1153:                                if (fp->f_count == 0)
                   1154:                                        continue;
1.1       cgd      1155:                                if (fp->f_flag & FMARK)
                   1156:                                        continue;
                   1157:                                if (fp->f_count == fp->f_msgcount)
                   1158:                                        continue;
                   1159:                        }
1.39      sommerfe 1160:                        fp->f_flag |= FMARK;
                   1161:
1.1       cgd      1162:                        if (fp->f_type != DTYPE_SOCKET ||
                   1163:                            (so = (struct socket *)fp->f_data) == 0)
                   1164:                                continue;
                   1165:                        if (so->so_proto->pr_domain != &unixdomain ||
                   1166:                            (so->so_proto->pr_flags&PR_RIGHTS) == 0)
                   1167:                                continue;
                   1168: #ifdef notdef
                   1169:                        if (so->so_rcv.sb_flags & SB_LOCK) {
                   1170:                                /*
                   1171:                                 * This is problematical; it's not clear
                   1172:                                 * we need to wait for the sockbuf to be
                   1173:                                 * unlocked (on a uniprocessor, at least),
                   1174:                                 * and it's also not clear what to do
                   1175:                                 * if sbwait returns an error due to receipt
                   1176:                                 * of a signal.  If sbwait does return
                   1177:                                 * an error, we'll go into an infinite
                   1178:                                 * loop.  Delete all of this for now.
                   1179:                                 */
                   1180:                                (void) sbwait(&so->so_rcv);
                   1181:                                goto restart;
                   1182:                        }
                   1183: #endif
1.39      sommerfe 1184:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                   1185:                        /*
                   1186:                         * mark descriptors referenced from sockets queued on the accept queue as well.
                   1187:                         */
                   1188:                        if (so->so_options & SO_ACCEPTCONN) {
1.54      matt     1189:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1190:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1191:                                }
1.54      matt     1192:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1193:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1194:                                }
                   1195:                        }
1.80      perry    1196:
1.1       cgd      1197:                }
                   1198:        } while (unp_defer);
1.8       mycroft  1199:        /*
1.39      sommerfe 1200:         * Sweep pass.  Find unmarked descriptors, and free them.
                   1201:         *
1.8       mycroft  1202:         * We grab an extra reference to each of the file table entries
                   1203:         * that are not otherwise accessible and then free the rights
                   1204:         * that are stored in messages on them.
                   1205:         *
1.57      pk       1206:         * The bug in the original code is a little tricky, so I'll describe
1.8       mycroft  1207:         * what's wrong with it here.
                   1208:         *
                   1209:         * It is incorrect to simply unp_discard each entry for f_msgcount
                   1210:         * times -- consider the case of sockets A and B that contain
                   1211:         * references to each other.  On a last close of some other socket,
                   1212:         * we trigger a gc since the number of outstanding rights (unp_rights)
                   1213:         * is non-zero.  If during the sweep phase the gc code un_discards,
                   1214:         * we end up doing a (full) closef on the descriptor.  A closef on A
                   1215:         * results in the following chain.  Closef calls soo_close, which
                   1216:         * calls soclose.   Soclose calls first (through the switch
                   1217:         * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
                   1218:         * returns because the previous instance had set unp_gcing, and
                   1219:         * we return all the way back to soclose, which marks the socket
                   1220:         * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
                   1221:         * to free up the rights that are queued in messages on the socket A,
                   1222:         * i.e., the reference on B.  The sorflush calls via the dom_dispose
                   1223:         * switch unp_dispose, which unp_scans with unp_discard.  This second
                   1224:         * instance of unp_discard just calls closef on B.
                   1225:         *
                   1226:         * Well, a similar chain occurs on B, resulting in a sorflush on B,
                   1227:         * which results in another closef on A.  Unfortunately, A is already
                   1228:         * being closed, and the descriptor has already been marked with
                   1229:         * SS_NOFDREF, and soclose panics at this point.
                   1230:         *
                   1231:         * Here, we first take an extra reference to each inaccessible
1.39      sommerfe 1232:         * descriptor.  Then, if the inaccessible descriptor is a
                   1233:         * socket, we call sorflush in case it is a Unix domain
                   1234:         * socket.  After we destroy all the rights carried in
                   1235:         * messages, we do a last closef to get rid of our extra
                   1236:         * reference.  This is the last close, and the unp_detach etc
                   1237:         * will shut down the socket.
1.8       mycroft  1238:         *
                   1239:         * 91/09/19, bsy@cs.cmu.edu
                   1240:         */
                   1241:        extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1.54      matt     1242:        for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11      mycroft  1243:            fp = nextfp) {
1.54      matt     1244:                nextfp = LIST_NEXT(fp, f_list);
1.57      pk       1245:                simple_lock(&fp->f_slock);
                   1246:                if (fp->f_count != 0 &&
                   1247:                    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8       mycroft  1248:                        *fpp++ = fp;
                   1249:                        nunref++;
                   1250:                        fp->f_count++;
                   1251:                }
1.57      pk       1252:                simple_unlock(&fp->f_slock);
1.1       cgd      1253:        }
1.39      sommerfe 1254:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1255:                fp = *fpp;
1.57      pk       1256:                simple_lock(&fp->f_slock);
1.44      thorpej  1257:                FILE_USE(fp);
1.39      sommerfe 1258:                if (fp->f_type == DTYPE_SOCKET)
                   1259:                        sorflush((struct socket *)fp->f_data);
1.44      thorpej  1260:                FILE_UNUSE(fp, NULL);
1.39      sommerfe 1261:        }
1.44      thorpej  1262:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1263:                fp = *fpp;
1.57      pk       1264:                simple_lock(&fp->f_slock);
1.44      thorpej  1265:                FILE_USE(fp);
1.86      christos 1266:                (void) closef(fp, (struct lwp *)0);
1.44      thorpej  1267:        }
1.95      christos 1268:        free((void *)extra_ref, M_FILE);
1.1       cgd      1269:        unp_gcing = 0;
                   1270: }
                   1271:
1.5       andrew   1272: void
1.76      matt     1273: unp_dispose(struct mbuf *m)
1.1       cgd      1274: {
1.8       mycroft  1275:
1.1       cgd      1276:        if (m)
1.39      sommerfe 1277:                unp_scan(m, unp_discard, 1);
1.1       cgd      1278: }
                   1279:
1.5       andrew   1280: void
1.76      matt     1281: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1       cgd      1282: {
1.46      augustss 1283:        struct mbuf *m;
                   1284:        struct file **rp;
                   1285:        struct cmsghdr *cm;
                   1286:        int i;
1.1       cgd      1287:        int qfds;
                   1288:
                   1289:        while (m0) {
1.48      thorpej  1290:                for (m = m0; m; m = m->m_next) {
1.1       cgd      1291:                        if (m->m_type == MT_CONTROL &&
                   1292:                            m->m_len >= sizeof(*cm)) {
                   1293:                                cm = mtod(m, struct cmsghdr *);
                   1294:                                if (cm->cmsg_level != SOL_SOCKET ||
                   1295:                                    cm->cmsg_type != SCM_RIGHTS)
                   1296:                                        continue;
1.48      thorpej  1297:                                qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1298:                                    / sizeof(struct file *);
                   1299:                                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe 1300:                                for (i = 0; i < qfds; i++) {
                   1301:                                        struct file *fp = *rp;
                   1302:                                        if (discard)
                   1303:                                                *rp = 0;
                   1304:                                        (*op)(fp);
                   1305:                                        rp++;
                   1306:                                }
1.1       cgd      1307:                                break;          /* XXX, but saves time */
                   1308:                        }
1.48      thorpej  1309:                }
1.52      thorpej  1310:                m0 = m0->m_nextpkt;
1.1       cgd      1311:        }
                   1312: }
                   1313:
1.5       andrew   1314: void
1.76      matt     1315: unp_mark(struct file *fp)
1.1       cgd      1316: {
1.39      sommerfe 1317:        if (fp == NULL)
                   1318:                return;
1.80      perry    1319:
1.39      sommerfe 1320:        if (fp->f_flag & FMARK)
                   1321:                return;
1.1       cgd      1322:
1.39      sommerfe 1323:        /* If we're already deferred, don't screw up the defer count */
                   1324:        if (fp->f_flag & FDEFER)
1.1       cgd      1325:                return;
1.39      sommerfe 1326:
                   1327:        /*
                   1328:         * Minimize the number of deferrals...  Sockets are the only
                   1329:         * type of descriptor which can hold references to another
                   1330:         * descriptor, so just mark other descriptors, and defer
                   1331:         * unmarked sockets for the next pass.
                   1332:         */
                   1333:        if (fp->f_type == DTYPE_SOCKET) {
                   1334:                unp_defer++;
                   1335:                if (fp->f_count == 0)
                   1336:                        panic("unp_mark: queued unref");
                   1337:                fp->f_flag |= FDEFER;
                   1338:        } else {
                   1339:                fp->f_flag |= FMARK;
                   1340:        }
                   1341:        return;
1.1       cgd      1342: }
                   1343:
1.5       andrew   1344: void
1.76      matt     1345: unp_discard(struct file *fp)
1.1       cgd      1346: {
1.39      sommerfe 1347:        if (fp == NULL)
                   1348:                return;
1.57      pk       1349:        simple_lock(&fp->f_slock);
                   1350:        fp->f_usecount++;       /* i.e. FILE_USE(fp) sans locking */
1.1       cgd      1351:        fp->f_msgcount--;
1.57      pk       1352:        simple_unlock(&fp->f_slock);
1.1       cgd      1353:        unp_rights--;
1.86      christos 1354:        (void) closef(fp, (struct lwp *)0);
1.1       cgd      1355: }

CVSweb <webmaster@jp.NetBSD.org>