[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.95.2.4

1.95.2.4! ad          1: /*     $NetBSD: uipc_usrreq.c,v 1.95.2.3 2007/04/12 23:14:20 ad Exp $  */
1.30      thorpej     2:
                      3: /*-
1.77      matt        4:  * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.10      cgd        39:
1.1       cgd        40: /*
1.8       mycroft    41:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        43:  *
                     44:  * Redistribution and use in source and binary forms, with or without
                     45:  * modification, are permitted provided that the following conditions
                     46:  * are met:
                     47:  * 1. Redistributions of source code must retain the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
1.67      agc        52:  * 3. Neither the name of the University nor the names of its contributors
                     53:  *    may be used to endorse or promote products derived from this software
                     54:  *    without specific prior written permission.
                     55:  *
                     56:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     57:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     58:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     59:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     60:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     61:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     62:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     63:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     64:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     65:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     66:  * SUCH DAMAGE.
                     67:  *
                     68:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     69:  */
                     70:
                     71: /*
                     72:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     73:  *
                     74:  * Redistribution and use in source and binary forms, with or without
                     75:  * modification, are permitted provided that the following conditions
                     76:  * are met:
                     77:  * 1. Redistributions of source code must retain the above copyright
                     78:  *    notice, this list of conditions and the following disclaimer.
                     79:  * 2. Redistributions in binary form must reproduce the above copyright
                     80:  *    notice, this list of conditions and the following disclaimer in the
                     81:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        82:  * 3. All advertising materials mentioning features or use of this software
                     83:  *    must display the following acknowledgement:
                     84:  *     This product includes software developed by the University of
                     85:  *     California, Berkeley and its contributors.
                     86:  * 4. Neither the name of the University nor the names of its contributors
                     87:  *    may be used to endorse or promote products derived from this software
                     88:  *    without specific prior written permission.
                     89:  *
                     90:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     91:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     92:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     93:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     94:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     95:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     96:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     97:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     98:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     99:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    100:  * SUCH DAMAGE.
                    101:  *
1.31      fvdl      102:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd       103:  */
1.53      lukem     104:
                    105: #include <sys/cdefs.h>
1.95.2.4! ad        106: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.95.2.3 2007/04/12 23:14:20 ad Exp $");
1.1       cgd       107:
1.7       mycroft   108: #include <sys/param.h>
1.8       mycroft   109: #include <sys/systm.h>
1.7       mycroft   110: #include <sys/proc.h>
                    111: #include <sys/filedesc.h>
                    112: #include <sys/domain.h>
                    113: #include <sys/protosw.h>
                    114: #include <sys/socket.h>
                    115: #include <sys/socketvar.h>
                    116: #include <sys/unpcb.h>
                    117: #include <sys/un.h>
                    118: #include <sys/namei.h>
                    119: #include <sys/vnode.h>
                    120: #include <sys/file.h>
                    121: #include <sys/stat.h>
                    122: #include <sys/mbuf.h>
1.91      elad      123: #include <sys/kauth.h>
1.95.2.3  ad        124: #include <sys/kmem.h>
1.1       cgd       125:
                    126: /*
                    127:  * Unix communications domain.
                    128:  *
                    129:  * TODO:
                    130:  *     SEQPACKET, RDM
                    131:  *     rethink name space problems
                    132:  *     need a proper out-of-band
                    133:  */
1.93      christos  134: const struct sockaddr_un sun_noname = {
                    135:        .sun_len = sizeof(sun_noname),
                    136:        .sun_family = AF_LOCAL,
                    137: };
1.1       cgd       138: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    139:
1.92      ad        140: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.30      thorpej   141:
1.20      mycroft   142: int
1.76      matt      143: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        144:        struct lwp *l)
1.20      mycroft   145: {
                    146:        struct socket *so2;
1.77      matt      147:        const struct sockaddr_un *sun;
1.20      mycroft   148:
                    149:        so2 = unp->unp_conn->unp_socket;
                    150:        if (unp->unp_addr)
                    151:                sun = unp->unp_addr;
                    152:        else
                    153:                sun = &sun_noname;
1.30      thorpej   154:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        155:                control = unp_addsockcred(l, control);
1.82      christos  156:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   157:            control) == 0) {
                    158:                m_freem(control);
                    159:                m_freem(m);
1.79      darrenr   160:                so2->so_rcv.sb_overflowed++;
1.60      christos  161:                return (ENOBUFS);
1.20      mycroft   162:        } else {
                    163:                sorwakeup(so2);
                    164:                return (0);
                    165:        }
                    166: }
                    167:
                    168: void
1.76      matt      169: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   170: {
1.77      matt      171:        const struct sockaddr_un *sun;
1.20      mycroft   172:
                    173:        if (unp->unp_addr)
                    174:                sun = unp->unp_addr;
                    175:        else
                    176:                sun = &sun_noname;
                    177:        nam->m_len = sun->sun_len;
1.56      itojun    178:        if (nam->m_len > MLEN)
1.27      thorpej   179:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  180:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   181: }
                    182:
                    183: void
1.76      matt      184: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   185: {
1.77      matt      186:        const struct sockaddr_un *sun;
1.20      mycroft   187:
                    188:        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    189:                sun = unp->unp_conn->unp_addr;
                    190:        else
                    191:                sun = &sun_noname;
                    192:        nam->m_len = sun->sun_len;
1.56      itojun    193:        if (nam->m_len > MLEN)
1.27      thorpej   194:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  195:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   196: }
                    197:
1.1       cgd       198: /*ARGSUSED*/
1.5       andrew    199: int
1.76      matt      200: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86      christos  201:        struct mbuf *control, struct lwp *l)
1.1       cgd       202: {
                    203:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  204:        struct socket *so2;
1.86      christos  205:        struct proc *p;
1.75      christos  206:        u_int newhiwat;
1.46      augustss  207:        int error = 0;
1.1       cgd       208:
                    209:        if (req == PRU_CONTROL)
                    210:                return (EOPNOTSUPP);
1.20      mycroft   211:
1.22      mycroft   212: #ifdef DIAGNOSTIC
                    213:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    214:                panic("uipc_usrreq: unexpected control mbuf");
                    215: #endif
1.86      christos  216:        p = l ? l->l_proc : NULL;
1.1       cgd       217:        if (unp == 0 && req != PRU_ATTACH) {
                    218:                error = EINVAL;
                    219:                goto release;
                    220:        }
1.20      mycroft   221:
1.1       cgd       222:        switch (req) {
                    223:
                    224:        case PRU_ATTACH:
1.20      mycroft   225:                if (unp != 0) {
1.1       cgd       226:                        error = EISCONN;
                    227:                        break;
                    228:                }
                    229:                error = unp_attach(so);
                    230:                break;
                    231:
                    232:        case PRU_DETACH:
                    233:                unp_detach(unp);
                    234:                break;
                    235:
                    236:        case PRU_BIND:
1.90      christos  237:                KASSERT(l != NULL);
1.86      christos  238:                error = unp_bind(unp, nam, l);
1.1       cgd       239:                break;
                    240:
                    241:        case PRU_LISTEN:
                    242:                if (unp->unp_vnode == 0)
                    243:                        error = EINVAL;
                    244:                break;
                    245:
                    246:        case PRU_CONNECT:
1.90      christos  247:                KASSERT(l != NULL);
1.86      christos  248:                error = unp_connect(so, nam, l);
1.1       cgd       249:                break;
                    250:
                    251:        case PRU_CONNECT2:
1.72      matt      252:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       253:                break;
                    254:
                    255:        case PRU_DISCONNECT:
                    256:                unp_disconnect(unp);
                    257:                break;
                    258:
                    259:        case PRU_ACCEPT:
1.20      mycroft   260:                unp_setpeeraddr(unp, nam);
1.72      matt      261:                /*
                    262:                 * Mark the initiating STREAM socket as connected *ONLY*
                    263:                 * after it's been accepted.  This prevents a client from
                    264:                 * overrunning a server and receiving ECONNREFUSED.
                    265:                 */
                    266:                if (unp->unp_conn != NULL &&
                    267:                    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
                    268:                        soisconnected(unp->unp_conn->unp_socket);
1.1       cgd       269:                break;
                    270:
                    271:        case PRU_SHUTDOWN:
                    272:                socantsendmore(so);
                    273:                unp_shutdown(unp);
                    274:                break;
                    275:
                    276:        case PRU_RCVD:
                    277:                switch (so->so_type) {
                    278:
                    279:                case SOCK_DGRAM:
                    280:                        panic("uipc 1");
                    281:                        /*NOTREACHED*/
                    282:
                    283:                case SOCK_STREAM:
                    284: #define        rcv (&so->so_rcv)
                    285: #define snd (&so2->so_snd)
                    286:                        if (unp->unp_conn == 0)
                    287:                                break;
                    288:                        so2 = unp->unp_conn->unp_socket;
                    289:                        /*
                    290:                         * Adjust backpressure on sender
                    291:                         * and wakeup any waiting to write.
                    292:                         */
                    293:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    294:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  295:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  296:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  297:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       298:                        unp->unp_cc = rcv->sb_cc;
                    299:                        sowwakeup(so2);
                    300: #undef snd
                    301: #undef rcv
                    302:                        break;
                    303:
                    304:                default:
                    305:                        panic("uipc 2");
                    306:                }
                    307:                break;
                    308:
                    309:        case PRU_SEND:
1.30      thorpej   310:                /*
                    311:                 * Note: unp_internalize() rejects any control message
                    312:                 * other than SCM_RIGHTS, and only allows one.  This
                    313:                 * has the side-effect of preventing a caller from
                    314:                 * forging SCM_CREDS.
                    315:                 */
1.90      christos  316:                if (control) {
                    317:                        KASSERT(l != NULL);
                    318:                        if ((error = unp_internalize(control, l)) != 0)
                    319:                                goto die;
1.83      yamt      320:                }
1.1       cgd       321:                switch (so->so_type) {
                    322:
                    323:                case SOCK_DGRAM: {
                    324:                        if (nam) {
1.20      mycroft   325:                                if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1       cgd       326:                                        error = EISCONN;
1.21      mycroft   327:                                        goto die;
1.1       cgd       328:                                }
1.90      christos  329:                                KASSERT(l != NULL);
1.86      christos  330:                                error = unp_connect(so, nam, l);
1.20      mycroft   331:                                if (error) {
1.23      mycroft   332:                                die:
1.21      mycroft   333:                                        m_freem(control);
1.20      mycroft   334:                                        m_freem(m);
1.1       cgd       335:                                        break;
1.20      mycroft   336:                                }
1.1       cgd       337:                        } else {
1.20      mycroft   338:                                if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1       cgd       339:                                        error = ENOTCONN;
1.21      mycroft   340:                                        goto die;
1.1       cgd       341:                                }
                    342:                        }
1.89      christos  343:                        KASSERT(p != NULL);
1.92      ad        344:                        error = unp_output(m, control, unp, l);
1.1       cgd       345:                        if (nam)
                    346:                                unp_disconnect(unp);
                    347:                        break;
                    348:                }
                    349:
                    350:                case SOCK_STREAM:
                    351: #define        rcv (&so2->so_rcv)
                    352: #define        snd (&so->so_snd)
1.87      christos  353:                        if (unp->unp_conn == NULL) {
                    354:                                error = ENOTCONN;
                    355:                                break;
                    356:                        }
1.1       cgd       357:                        so2 = unp->unp_conn->unp_socket;
1.30      thorpej   358:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    359:                                /*
                    360:                                 * Credentials are passed only once on
                    361:                                 * SOCK_STREAM.
                    362:                                 */
                    363:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        364:                                control = unp_addsockcred(l, control);
1.30      thorpej   365:                        }
1.1       cgd       366:                        /*
                    367:                         * Send to paired receive port, and then reduce
                    368:                         * send buffer hiwater marks to maintain backpressure.
                    369:                         * Wake up readers.
                    370:                         */
                    371:                        if (control) {
1.21      mycroft   372:                                if (sbappendcontrol(rcv, m, control) == 0)
                    373:                                        m_freem(control);
1.1       cgd       374:                        } else
                    375:                                sbappend(rcv, m);
                    376:                        snd->sb_mbmax -=
                    377:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    378:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  379:                        newhiwat = snd->sb_hiwat -
                    380:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  381:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  382:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       383:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    384:                        sorwakeup(so2);
                    385: #undef snd
                    386: #undef rcv
                    387:                        break;
                    388:
                    389:                default:
                    390:                        panic("uipc 4");
                    391:                }
                    392:                break;
                    393:
                    394:        case PRU_ABORT:
                    395:                unp_drop(unp, ECONNABORTED);
1.39      sommerfe  396:
1.88      matt      397:                KASSERT(so->so_head == NULL);
1.39      sommerfe  398: #ifdef DIAGNOSTIC
                    399:                if (so->so_pcb == 0)
                    400:                        panic("uipc 5: drop killed pcb");
                    401: #endif
                    402:                unp_detach(unp);
1.1       cgd       403:                break;
                    404:
                    405:        case PRU_SENSE:
                    406:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
                    407:                if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
                    408:                        so2 = unp->unp_conn->unp_socket;
                    409:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
                    410:                }
                    411:                ((struct stat *) m)->st_dev = NODEV;
                    412:                if (unp->unp_ino == 0)
                    413:                        unp->unp_ino = unp_ino++;
1.25      kleink    414:                ((struct stat *) m)->st_atimespec =
                    415:                    ((struct stat *) m)->st_mtimespec =
                    416:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       417:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    418:                return (0);
                    419:
                    420:        case PRU_RCVOOB:
1.20      mycroft   421:                error = EOPNOTSUPP;
                    422:                break;
1.1       cgd       423:
                    424:        case PRU_SENDOOB:
1.22      mycroft   425:                m_freem(control);
1.20      mycroft   426:                m_freem(m);
1.1       cgd       427:                error = EOPNOTSUPP;
                    428:                break;
                    429:
                    430:        case PRU_SOCKADDR:
1.20      mycroft   431:                unp_setsockaddr(unp, nam);
1.1       cgd       432:                break;
                    433:
                    434:        case PRU_PEERADDR:
1.20      mycroft   435:                unp_setpeeraddr(unp, nam);
1.1       cgd       436:                break;
                    437:
                    438:        default:
                    439:                panic("piusrreq");
                    440:        }
1.20      mycroft   441:
1.1       cgd       442: release:
                    443:        return (error);
                    444: }
                    445:
                    446: /*
1.30      thorpej   447:  * Unix domain socket option processing.
                    448:  */
                    449: int
1.76      matt      450: uipc_ctloutput(int op, struct socket *so, int level, int optname,
                    451:        struct mbuf **mp)
1.30      thorpej   452: {
                    453:        struct unpcb *unp = sotounpcb(so);
                    454:        struct mbuf *m = *mp;
                    455:        int optval = 0, error = 0;
                    456:
                    457:        if (level != 0) {
                    458:                error = EINVAL;
                    459:                if (op == PRCO_SETOPT && m)
                    460:                        (void) m_free(m);
                    461:        } else switch (op) {
                    462:
                    463:        case PRCO_SETOPT:
                    464:                switch (optname) {
                    465:                case LOCAL_CREDS:
1.72      matt      466:                case LOCAL_CONNWAIT:
1.30      thorpej   467:                        if (m == NULL || m->m_len != sizeof(int))
                    468:                                error = EINVAL;
                    469:                        else {
                    470:                                optval = *mtod(m, int *);
                    471:                                switch (optname) {
                    472: #define        OPTSET(bit) \
                    473:        if (optval) \
                    474:                unp->unp_flags |= (bit); \
                    475:        else \
                    476:                unp->unp_flags &= ~(bit);
                    477:
                    478:                                case LOCAL_CREDS:
                    479:                                        OPTSET(UNP_WANTCRED);
                    480:                                        break;
1.72      matt      481:                                case LOCAL_CONNWAIT:
                    482:                                        OPTSET(UNP_CONNWAIT);
                    483:                                        break;
1.30      thorpej   484:                                }
                    485:                        }
                    486:                        break;
                    487: #undef OPTSET
                    488:
                    489:                default:
                    490:                        error = ENOPROTOOPT;
                    491:                        break;
                    492:                }
                    493:                if (m)
                    494:                        (void) m_free(m);
                    495:                break;
                    496:
                    497:        case PRCO_GETOPT:
                    498:                switch (optname) {
                    499:                case LOCAL_CREDS:
                    500:                        *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    501:                        m->m_len = sizeof(int);
                    502:                        switch (optname) {
                    503:
                    504: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    505:
                    506:                        case LOCAL_CREDS:
                    507:                                optval = OPTBIT(UNP_WANTCRED);
                    508:                                break;
                    509:                        }
                    510:                        *mtod(m, int *) = optval;
                    511:                        break;
                    512: #undef OPTBIT
                    513:
                    514:                default:
                    515:                        error = ENOPROTOOPT;
                    516:                        break;
                    517:                }
                    518:                break;
                    519:        }
                    520:        return (error);
                    521: }
                    522:
                    523: /*
1.1       cgd       524:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    525:  * for stream sockets, although the total for sender and receiver is
                    526:  * actually only PIPSIZ.
                    527:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    528:  * and don't really want to reserve the sendspace.  Their recvspace should
                    529:  * be large enough for at least one max-size datagram plus address.
                    530:  */
                    531: #define        PIPSIZ  4096
                    532: u_long unpst_sendspace = PIPSIZ;
                    533: u_long unpst_recvspace = PIPSIZ;
                    534: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    535: u_long unpdg_recvspace = 4*1024;
                    536:
                    537: int    unp_rights;                     /* file descriptors in flight */
                    538:
1.5       andrew    539: int
1.76      matt      540: unp_attach(struct socket *so)
1.1       cgd       541: {
1.46      augustss  542:        struct unpcb *unp;
1.1       cgd       543:        int error;
1.80      perry     544:
1.1       cgd       545:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    546:                switch (so->so_type) {
                    547:
                    548:                case SOCK_STREAM:
                    549:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
                    550:                        break;
                    551:
                    552:                case SOCK_DGRAM:
                    553:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
                    554:                        break;
1.8       mycroft   555:
                    556:                default:
                    557:                        panic("unp_attach");
1.1       cgd       558:                }
                    559:                if (error)
                    560:                        return (error);
                    561:        }
1.14      mycroft   562:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    563:        if (unp == NULL)
1.1       cgd       564:                return (ENOBUFS);
1.95      christos  565:        memset((void *)unp, 0, sizeof(*unp));
1.14      mycroft   566:        unp->unp_socket = so;
1.15      mycroft   567:        so->so_pcb = unp;
1.85      simonb    568:        nanotime(&unp->unp_ctime);
1.1       cgd       569:        return (0);
                    570: }
                    571:
1.17      pk        572: void
1.76      matt      573: unp_detach(struct unpcb *unp)
1.1       cgd       574: {
1.80      perry     575:
1.1       cgd       576:        if (unp->unp_vnode) {
                    577:                unp->unp_vnode->v_socket = 0;
                    578:                vrele(unp->unp_vnode);
                    579:                unp->unp_vnode = 0;
                    580:        }
                    581:        if (unp->unp_conn)
                    582:                unp_disconnect(unp);
                    583:        while (unp->unp_refs)
                    584:                unp_drop(unp->unp_refs, ECONNRESET);
                    585:        soisdisconnected(unp->unp_socket);
                    586:        unp->unp_socket->so_pcb = 0;
1.20      mycroft   587:        if (unp->unp_addr)
1.26      thorpej   588:                free(unp->unp_addr, M_SONAME);
1.8       mycroft   589:        if (unp_rights) {
                    590:                /*
                    591:                 * Normally the receive buffer is flushed later,
                    592:                 * in sofree, but if our receive buffer holds references
                    593:                 * to descriptors that are now garbage, we will dispose
                    594:                 * of those descriptor references after the garbage collector
                    595:                 * gets them (resulting in a "panic: closef: count < 0").
                    596:                 */
                    597:                sorflush(unp->unp_socket);
1.14      mycroft   598:                free(unp, M_PCB);
1.1       cgd       599:                unp_gc();
1.14      mycroft   600:        } else
                    601:                free(unp, M_PCB);
1.1       cgd       602: }
                    603:
1.5       andrew    604: int
1.86      christos  605: unp_bind(struct unpcb *unp, struct mbuf *nam, struct lwp *l)
1.1       cgd       606: {
1.27      thorpej   607:        struct sockaddr_un *sun;
1.46      augustss  608:        struct vnode *vp;
1.1       cgd       609:        struct vattr vattr;
1.27      thorpej   610:        size_t addrlen;
1.86      christos  611:        struct proc *p;
1.1       cgd       612:        int error;
                    613:        struct nameidata nd;
                    614:
1.20      mycroft   615:        if (unp->unp_vnode != 0)
                    616:                return (EINVAL);
1.27      thorpej   617:
1.86      christos  618:        p = l->l_proc;
1.27      thorpej   619:        /*
                    620:         * Allocate the new sockaddr.  We have to allocate one
                    621:         * extra byte so that we can ensure that the pathname
                    622:         * is nul-terminated.
                    623:         */
                    624:        addrlen = nam->m_len + 1;
                    625:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  626:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   627:        *(((char *)sun) + nam->m_len) = '\0';
                    628:
1.95.2.4! ad        629:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE,
1.86      christos  630:            sun->sun_path, l);
1.27      thorpej   631:
1.1       cgd       632: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16      christos  633:        if ((error = namei(&nd)) != 0)
1.27      thorpej   634:                goto bad;
1.9       mycroft   635:        vp = nd.ni_vp;
1.95.2.2  ad        636:        if (vp != NULL) {
1.9       mycroft   637:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    638:                if (nd.ni_dvp == vp)
                    639:                        vrele(nd.ni_dvp);
1.1       cgd       640:                else
1.9       mycroft   641:                        vput(nd.ni_dvp);
1.1       cgd       642:                vrele(vp);
1.95.2.2  ad        643:                error = EADDRINUSE;
                    644:                goto bad;
1.1       cgd       645:        }
                    646:        VATTR_NULL(&vattr);
                    647:        vattr.va_type = VSOCK;
1.84      jmmv      648:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.92      ad        649:        VOP_LEASE(nd.ni_dvp, l, l->l_cred, LEASE_WRITE);
1.16      christos  650:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
                    651:        if (error)
1.27      thorpej   652:                goto bad;
1.9       mycroft   653:        vp = nd.ni_vp;
1.1       cgd       654:        vp->v_socket = unp->unp_socket;
                    655:        unp->unp_vnode = vp;
1.27      thorpej   656:        unp->unp_addrlen = addrlen;
                    657:        unp->unp_addr = sun;
1.31      fvdl      658:        VOP_UNLOCK(vp, 0);
1.1       cgd       659:        return (0);
1.27      thorpej   660:
                    661:  bad:
                    662:        free(sun, M_SONAME);
                    663:        return (error);
1.1       cgd       664: }
                    665:
1.5       andrew    666: int
1.86      christos  667: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       668: {
1.46      augustss  669:        struct sockaddr_un *sun;
                    670:        struct vnode *vp;
                    671:        struct socket *so2, *so3;
1.1       cgd       672:        struct unpcb *unp2, *unp3;
1.27      thorpej   673:        size_t addrlen;
1.1       cgd       674:        int error;
                    675:        struct nameidata nd;
                    676:
1.27      thorpej   677:        /*
                    678:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    679:         * byte so that we can ensure that the pathname is nul-terminated.
                    680:         * When we establish the connection, we copy the other PCB's
                    681:         * sockaddr to our own.
                    682:         */
                    683:        addrlen = nam->m_len + 1;
                    684:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  685:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   686:        *(((char *)sun) + nam->m_len) = '\0';
                    687:
1.95.2.4! ad        688:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, sun->sun_path, l);
1.27      thorpej   689:
1.16      christos  690:        if ((error = namei(&nd)) != 0)
1.27      thorpej   691:                goto bad2;
1.9       mycroft   692:        vp = nd.ni_vp;
1.1       cgd       693:        if (vp->v_type != VSOCK) {
                    694:                error = ENOTSOCK;
                    695:                goto bad;
                    696:        }
1.92      ad        697:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred, l)) != 0)
1.1       cgd       698:                goto bad;
                    699:        so2 = vp->v_socket;
                    700:        if (so2 == 0) {
                    701:                error = ECONNREFUSED;
                    702:                goto bad;
                    703:        }
                    704:        if (so->so_type != so2->so_type) {
                    705:                error = EPROTOTYPE;
                    706:                goto bad;
                    707:        }
                    708:        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                    709:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    710:                    (so3 = sonewconn(so2, 0)) == 0) {
                    711:                        error = ECONNREFUSED;
                    712:                        goto bad;
                    713:                }
                    714:                unp2 = sotounpcb(so2);
                    715:                unp3 = sotounpcb(so3);
1.26      thorpej   716:                if (unp2->unp_addr) {
                    717:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                    718:                            M_SONAME, M_WAITOK);
1.36      perry     719:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej   720:                            unp2->unp_addrlen);
                    721:                        unp3->unp_addrlen = unp2->unp_addrlen;
                    722:                }
1.30      thorpej   723:                unp3->unp_flags = unp2->unp_flags;
1.33      thorpej   724:                so2 = so3;
                    725:        }
1.72      matt      726:        error = unp_connect2(so, so2, PRU_CONNECT);
1.27      thorpej   727:  bad:
1.1       cgd       728:        vput(vp);
1.27      thorpej   729:  bad2:
                    730:        free(sun, M_SONAME);
1.1       cgd       731:        return (error);
                    732: }
                    733:
1.5       andrew    734: int
1.76      matt      735: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd       736: {
1.46      augustss  737:        struct unpcb *unp = sotounpcb(so);
                    738:        struct unpcb *unp2;
1.1       cgd       739:
                    740:        if (so2->so_type != so->so_type)
                    741:                return (EPROTOTYPE);
                    742:        unp2 = sotounpcb(so2);
                    743:        unp->unp_conn = unp2;
                    744:        switch (so->so_type) {
                    745:
                    746:        case SOCK_DGRAM:
                    747:                unp->unp_nextref = unp2->unp_refs;
                    748:                unp2->unp_refs = unp;
                    749:                soisconnected(so);
                    750:                break;
                    751:
                    752:        case SOCK_STREAM:
                    753:                unp2->unp_conn = unp;
1.72      matt      754:                if (req == PRU_CONNECT &&
                    755:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                    756:                        soisconnecting(so);
                    757:                else
                    758:                        soisconnected(so);
1.1       cgd       759:                soisconnected(so2);
                    760:                break;
                    761:
                    762:        default:
                    763:                panic("unp_connect2");
                    764:        }
                    765:        return (0);
                    766: }
                    767:
1.5       andrew    768: void
1.76      matt      769: unp_disconnect(struct unpcb *unp)
1.1       cgd       770: {
1.46      augustss  771:        struct unpcb *unp2 = unp->unp_conn;
1.1       cgd       772:
                    773:        if (unp2 == 0)
                    774:                return;
                    775:        unp->unp_conn = 0;
                    776:        switch (unp->unp_socket->so_type) {
                    777:
                    778:        case SOCK_DGRAM:
                    779:                if (unp2->unp_refs == unp)
                    780:                        unp2->unp_refs = unp->unp_nextref;
                    781:                else {
                    782:                        unp2 = unp2->unp_refs;
                    783:                        for (;;) {
                    784:                                if (unp2 == 0)
                    785:                                        panic("unp_disconnect");
                    786:                                if (unp2->unp_nextref == unp)
                    787:                                        break;
                    788:                                unp2 = unp2->unp_nextref;
                    789:                        }
                    790:                        unp2->unp_nextref = unp->unp_nextref;
                    791:                }
                    792:                unp->unp_nextref = 0;
                    793:                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                    794:                break;
                    795:
                    796:        case SOCK_STREAM:
                    797:                soisdisconnected(unp->unp_socket);
                    798:                unp2->unp_conn = 0;
                    799:                soisdisconnected(unp2->unp_socket);
                    800:                break;
                    801:        }
                    802: }
                    803:
                    804: #ifdef notdef
1.76      matt      805: unp_abort(struct unpcb *unp)
1.1       cgd       806: {
                    807:        unp_detach(unp);
                    808: }
                    809: #endif
                    810:
1.5       andrew    811: void
1.76      matt      812: unp_shutdown(struct unpcb *unp)
1.1       cgd       813: {
                    814:        struct socket *so;
                    815:
                    816:        if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
                    817:            (so = unp->unp_conn->unp_socket))
                    818:                socantrcvmore(so);
                    819: }
                    820:
1.5       andrew    821: void
1.76      matt      822: unp_drop(struct unpcb *unp, int errno)
1.1       cgd       823: {
                    824:        struct socket *so = unp->unp_socket;
                    825:
                    826:        so->so_error = errno;
                    827:        unp_disconnect(unp);
                    828:        if (so->so_head) {
1.15      mycroft   829:                so->so_pcb = 0;
1.14      mycroft   830:                sofree(so);
1.20      mycroft   831:                if (unp->unp_addr)
1.26      thorpej   832:                        free(unp->unp_addr, M_SONAME);
1.14      mycroft   833:                free(unp, M_PCB);
1.1       cgd       834:        }
                    835: }
                    836:
                    837: #ifdef notdef
1.76      matt      838: unp_drain(void)
1.1       cgd       839: {
                    840:
                    841: }
                    842: #endif
                    843:
1.5       andrew    844: int
1.86      christos  845: unp_externalize(struct mbuf *rights, struct lwp *l)
1.1       cgd       846: {
1.46      augustss  847:        struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.86      christos  848:        struct proc *p = l->l_proc;
1.47      thorpej   849:        int i, *fdp;
1.46      augustss  850:        struct file **rp;
                    851:        struct file *fp;
1.50      thorpej   852:        int nfds, error = 0;
1.47      thorpej   853:
                    854:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
                    855:            sizeof(struct file *);
                    856:        rp = (struct file **)CMSG_DATA(cm);
1.1       cgd       857:
1.50      thorpej   858:        fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
1.95.2.3  ad        859:        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50      thorpej   860:
1.39      sommerfe  861:        /* Make sure the recipient should be able to see the descriptors.. */
1.42      thorpej   862:        if (p->p_cwdi->cwdi_rdir != NULL) {
1.48      thorpej   863:                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  864:                for (i = 0; i < nfds; i++) {
                    865:                        fp = *rp++;
                    866:                        /*
                    867:                         * If we are in a chroot'ed directory, and
                    868:                         * someone wants to pass us a directory, make
                    869:                         * sure it's inside the subtree we're allowed
                    870:                         * to access.
                    871:                         */
                    872:                        if (fp->f_type == DTYPE_VNODE) {
                    873:                                struct vnode *vp = (struct vnode *)fp->f_data;
                    874:                                if ((vp->v_type == VDIR) &&
1.86      christos  875:                                    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
1.39      sommerfe  876:                                        error = EPERM;
                    877:                                        break;
                    878:                                }
                    879:                        }
                    880:                }
                    881:        }
1.50      thorpej   882:
                    883:  restart:
1.47      thorpej   884:        rp = (struct file **)CMSG_DATA(cm);
1.50      thorpej   885:        if (error != 0) {
1.24      cgd       886:                for (i = 0; i < nfds; i++) {
1.1       cgd       887:                        fp = *rp;
1.39      sommerfe  888:                        /*
                    889:                         * zero the pointer before calling unp_discard,
                    890:                         * since it may end up in unp_gc()..
                    891:                         */
                    892:                        *rp++ = 0;
1.1       cgd       893:                        unp_discard(fp);
                    894:                }
1.50      thorpej   895:                goto out;
1.1       cgd       896:        }
1.50      thorpej   897:
1.24      cgd       898:        /*
1.50      thorpej   899:         * First loop -- allocate file descriptor table slots for the
                    900:         * new descriptors.
1.24      cgd       901:         */
                    902:        for (i = 0; i < nfds; i++) {
1.39      sommerfe  903:                fp = *rp++;
1.50      thorpej   904:                if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej   905:                        /*
1.50      thorpej   906:                         * Back out what we've done so far.
1.49      thorpej   907:                         */
1.50      thorpej   908:                        for (--i; i >= 0; i--)
                    909:                                fdremove(p->p_fd, fdp[i]);
                    910:
                    911:                        if (error == ENOSPC) {
                    912:                                fdexpand(p);
                    913:                                error = 0;
                    914:                        } else {
                    915:                                /*
                    916:                                 * This is the error that has historically
                    917:                                 * been returned, and some callers may
                    918:                                 * expect it.
                    919:                                 */
                    920:                                error = EMSGSIZE;
                    921:                        }
                    922:                        goto restart;
1.49      thorpej   923:                }
1.50      thorpej   924:
                    925:                /*
                    926:                 * Make the slot reference the descriptor so that
                    927:                 * fdalloc() works properly.. We finalize it all
                    928:                 * in the loop below.
                    929:                 */
1.95.2.3  ad        930:                rw_enter(&p->p_fd->fd_lock, RW_WRITER);
1.50      thorpej   931:                p->p_fd->fd_ofiles[fdp[i]] = fp;
1.95.2.3  ad        932:                rw_exit(&p->p_fd->fd_lock);
1.1       cgd       933:        }
1.24      cgd       934:
                    935:        /*
1.50      thorpej   936:         * Now that adding them has succeeded, update all of the
                    937:         * descriptor passing state.
1.24      cgd       938:         */
1.50      thorpej   939:        rp = (struct file **)CMSG_DATA(cm);
                    940:        for (i = 0; i < nfds; i++) {
                    941:                fp = *rp++;
                    942:                fp->f_msgcount--;
                    943:                unp_rights--;
                    944:        }
                    945:
                    946:        /*
                    947:         * Copy temporary array to message and adjust length, in case of
                    948:         * transition from large struct file pointers to ints.
                    949:         */
                    950:        memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47      thorpej   951:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                    952:        rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50      thorpej   953:  out:
1.95.2.3  ad        954:        rw_exit(&p->p_cwdi->cwdi_lock);
1.50      thorpej   955:        free(fdp, M_TEMP);
                    956:        return (error);
1.1       cgd       957: }
                    958:
1.5       andrew    959: int
1.86      christos  960: unp_internalize(struct mbuf *control, struct lwp *l)
1.1       cgd       961: {
1.86      christos  962:        struct proc *p = l->l_proc;
1.24      cgd       963:        struct filedesc *fdescp = p->p_fd;
1.73      martin    964:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
                    965:        struct file **rp, **files;
1.46      augustss  966:        struct file *fp;
                    967:        int i, fd, *fdp;
1.24      cgd       968:        int nfds;
                    969:        u_int neededspace;
1.38      thorpej   970:
1.24      cgd       971:        /* Sanity check the control message header */
1.66      jdolecek  972:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1       cgd       973:            cm->cmsg_len != control->m_len)
                    974:                return (EINVAL);
1.24      cgd       975:
                    976:        /* Verify that the file descriptors are valid */
1.47      thorpej   977:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                    978:        fdp = (int *)CMSG_DATA(cm);
1.24      cgd       979:        for (i = 0; i < nfds; i++) {
                    980:                fd = *fdp++;
1.58      pk        981:                if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1       cgd       982:                        return (EBADF);
1.95.2.1  ad        983:                /* XXXSMP grab reference to file */
                    984:                mutex_exit(&fp->f_lock);
1.1       cgd       985:        }
1.24      cgd       986:
                    987:        /* Make sure we have room for the struct file pointers */
1.47      thorpej   988:        neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
                    989:            control->m_len;
1.24      cgd       990:        if (neededspace > M_TRAILINGSPACE(control)) {
                    991:
1.73      martin    992:                /* allocate new space and copy header into it */
                    993:                newcm = malloc(
                    994:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                    995:                    M_MBUF, M_WAITOK);
1.95.2.1  ad        996:                if (newcm == NULL) {
                    997:                        /* XXXSMP drop references to files */
1.24      cgd       998:                        return (E2BIG);
1.95.2.1  ad        999:                }
1.73      martin   1000:                memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80      perry    1001:                files = (struct file **)CMSG_DATA(newcm);
1.73      martin   1002:        } else {
                   1003:                /* we can convert in-place */
                   1004:                newcm = NULL;
                   1005:                files = (struct file **)CMSG_DATA(cm);
1.24      cgd      1006:        }
                   1007:
                   1008:        /*
                   1009:         * Transform the file descriptors into struct file pointers, in
                   1010:         * reverse order so that if pointers are bigger than ints, the
                   1011:         * int won't get until we're done.
                   1012:         */
1.95.2.3  ad       1013:        rw_enter(&fdescp->fd_lock, RW_READER);
1.94      cbiere   1014:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1015:        rp = files + nfds;
1.24      cgd      1016:        for (i = 0; i < nfds; i++) {
1.94      cbiere   1017:                fp = fdescp->fd_ofiles[*--fdp];
1.95.2.1  ad       1018:                mutex_enter(&fp->f_lock);
1.57      pk       1019: #ifdef DIAGNOSTIC
                   1020:                if (fp->f_iflags & FIF_WANTCLOSE)
                   1021:                        panic("unp_internalize: file already closed");
                   1022: #endif
1.94      cbiere   1023:                *--rp = fp;
1.1       cgd      1024:                fp->f_count++;
                   1025:                fp->f_msgcount++;
1.95.2.1  ad       1026:                mutex_exit(&fp->f_lock);
1.1       cgd      1027:                unp_rights++;
                   1028:        }
1.95.2.3  ad       1029:        rw_exit(&fdescp->fd_lock);
1.73      martin   1030:
                   1031:        if (newcm) {
                   1032:                if (control->m_flags & M_EXT)
                   1033:                        MEXTREMOVE(control);
                   1034:                MEXTADD(control, newcm,
                   1035:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                   1036:                    M_MBUF, NULL, NULL);
                   1037:                cm = newcm;
                   1038:        }
                   1039:
                   1040:        /* adjust message & mbuf to note amount of space actually used. */
                   1041:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
                   1042:        control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
                   1043:
1.1       cgd      1044:        return (0);
1.30      thorpej  1045: }
                   1046:
                   1047: struct mbuf *
1.92      ad       1048: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1049: {
                   1050:        struct cmsghdr *cmp;
                   1051:        struct sockcred *sc;
                   1052:        struct mbuf *m, *n;
1.47      thorpej  1053:        int len, space, i;
1.30      thorpej  1054:
1.92      ad       1055:        len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
                   1056:        space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30      thorpej  1057:
                   1058:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1059:        if (space > MLEN) {
                   1060:                if (space > MCLBYTES)
                   1061:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1062:                else
1.59      matt     1063:                        m_clget(m, M_WAIT);
1.30      thorpej  1064:                if ((m->m_flags & M_EXT) == 0) {
                   1065:                        m_free(m);
                   1066:                        return (control);
                   1067:                }
                   1068:        }
                   1069:
1.47      thorpej  1070:        m->m_len = space;
1.30      thorpej  1071:        m->m_next = NULL;
                   1072:        cmp = mtod(m, struct cmsghdr *);
                   1073:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1074:        cmp->cmsg_len = len;
                   1075:        cmp->cmsg_level = SOL_SOCKET;
                   1076:        cmp->cmsg_type = SCM_CREDS;
1.92      ad       1077:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1078:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1079:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1080:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1081:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30      thorpej  1082:        for (i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1083:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1084:
                   1085:        /*
                   1086:         * If a control message already exists, append us to the end.
                   1087:         */
                   1088:        if (control != NULL) {
                   1089:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1090:                        ;
                   1091:                n->m_next = m;
                   1092:        } else
                   1093:                control = m;
                   1094:
                   1095:        return (control);
1.1       cgd      1096: }
                   1097:
                   1098: int    unp_defer, unp_gcing;
                   1099: extern struct domain unixdomain;
                   1100:
1.39      sommerfe 1101: /*
                   1102:  * Comment added long after the fact explaining what's going on here.
                   1103:  * Do a mark-sweep GC of file descriptors on the system, to free up
                   1104:  * any which are caught in flight to an about-to-be-closed socket.
                   1105:  *
                   1106:  * Traditional mark-sweep gc's start at the "root", and mark
                   1107:  * everything reachable from the root (which, in our case would be the
                   1108:  * process table).  The mark bits are cleared during the sweep.
                   1109:  *
                   1110:  * XXX For some inexplicable reason (perhaps because the file
                   1111:  * descriptor tables used to live in the u area which could be swapped
                   1112:  * out and thus hard to reach), we do multiple scans over the set of
                   1113:  * descriptors, using use *two* mark bits per object (DEFER and MARK).
                   1114:  * Whenever we find a descriptor which references other descriptors,
                   1115:  * the ones it references are marked with both bits, and we iterate
                   1116:  * over the whole file table until there are no more DEFER bits set.
                   1117:  * We also make an extra pass *before* the GC to clear the mark bits,
                   1118:  * which could have been cleared at almost no cost during the previous
                   1119:  * sweep.
                   1120:  *
                   1121:  * XXX MP: this needs to run with locks such that no other thread of
                   1122:  * control can create or destroy references to file descriptors. it
                   1123:  * may be necessary to defer the GC until later (when the locking
                   1124:  * situation is more hospitable); it may be necessary to push this
                   1125:  * into a separate thread.
                   1126:  */
1.5       andrew   1127: void
1.76      matt     1128: unp_gc(void)
1.1       cgd      1129: {
1.46      augustss 1130:        struct file *fp, *nextfp;
                   1131:        struct socket *so, *so1;
1.8       mycroft  1132:        struct file **extra_ref, **fpp;
                   1133:        int nunref, i;
1.1       cgd      1134:
                   1135:        if (unp_gcing)
                   1136:                return;
                   1137:        unp_gcing = 1;
                   1138:        unp_defer = 0;
1.39      sommerfe 1139:
1.95.2.3  ad       1140:        mutex_enter(&filelist_lock);
                   1141:
1.39      sommerfe 1142:        /* Clear mark bits */
1.54      matt     1143:        LIST_FOREACH(fp, &filehead, f_list)
1.1       cgd      1144:                fp->f_flag &= ~(FMARK|FDEFER);
1.39      sommerfe 1145:
                   1146:        /*
                   1147:         * Iterate over the set of descriptors, marking ones believed
                   1148:         * (based on refcount) to be referenced from a process, and
                   1149:         * marking for rescan descriptors which are queued on a socket.
                   1150:         */
1.1       cgd      1151:        do {
1.54      matt     1152:                LIST_FOREACH(fp, &filehead, f_list) {
1.95.2.3  ad       1153:                        mutex_enter(&fp->f_lock);
                   1154:                        if (fp->f_flag & FDEFER) {
1.1       cgd      1155:                                fp->f_flag &= ~FDEFER;
                   1156:                                unp_defer--;
1.39      sommerfe 1157: #ifdef DIAGNOSTIC
                   1158:                                if (fp->f_count == 0)
                   1159:                                        panic("unp_gc: deferred unreferenced socket");
                   1160: #endif
1.1       cgd      1161:                        } else {
1.95.2.3  ad       1162:                                if (fp->f_count == 0 ||
                   1163:                                    (fp->f_flag & FMARK) ||
                   1164:                                    fp->f_count == fp->f_msgcount) {
                   1165:                                        mutex_exit(&fp->f_lock);
1.1       cgd      1166:                                        continue;
1.95.2.3  ad       1167:                                }
1.1       cgd      1168:                        }
1.39      sommerfe 1169:                        fp->f_flag |= FMARK;
                   1170:
1.1       cgd      1171:                        if (fp->f_type != DTYPE_SOCKET ||
1.95.2.3  ad       1172:                            (so = (struct socket *)fp->f_data) == 0 ||
                   1173:                            so->so_proto->pr_domain != &unixdomain ||
                   1174:                            (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
                   1175:                                mutex_exit(&fp->f_lock);
1.1       cgd      1176:                                continue;
1.95.2.3  ad       1177:                        }
1.1       cgd      1178: #ifdef notdef
                   1179:                        if (so->so_rcv.sb_flags & SB_LOCK) {
                   1180:                                /*
                   1181:                                 * This is problematical; it's not clear
                   1182:                                 * we need to wait for the sockbuf to be
                   1183:                                 * unlocked (on a uniprocessor, at least),
                   1184:                                 * and it's also not clear what to do
                   1185:                                 * if sbwait returns an error due to receipt
                   1186:                                 * of a signal.  If sbwait does return
                   1187:                                 * an error, we'll go into an infinite
                   1188:                                 * loop.  Delete all of this for now.
                   1189:                                 */
                   1190:                                (void) sbwait(&so->so_rcv);
                   1191:                                goto restart;
                   1192:                        }
                   1193: #endif
1.95.2.3  ad       1194:                        mutex_exit(&fp->f_lock);
                   1195:
1.39      sommerfe 1196:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                   1197:                        /*
                   1198:                         * mark descriptors referenced from sockets queued on the accept queue as well.
                   1199:                         */
                   1200:                        if (so->so_options & SO_ACCEPTCONN) {
1.54      matt     1201:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1202:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1203:                                }
1.54      matt     1204:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1205:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1206:                                }
                   1207:                        }
1.1       cgd      1208:                }
                   1209:        } while (unp_defer);
1.95.2.3  ad       1210:
                   1211:        mutex_exit(&filelist_lock);
                   1212:
1.8       mycroft  1213:        /*
1.39      sommerfe 1214:         * Sweep pass.  Find unmarked descriptors, and free them.
                   1215:         *
1.8       mycroft  1216:         * We grab an extra reference to each of the file table entries
                   1217:         * that are not otherwise accessible and then free the rights
                   1218:         * that are stored in messages on them.
                   1219:         *
1.57      pk       1220:         * The bug in the original code is a little tricky, so I'll describe
1.8       mycroft  1221:         * what's wrong with it here.
                   1222:         *
                   1223:         * It is incorrect to simply unp_discard each entry for f_msgcount
                   1224:         * times -- consider the case of sockets A and B that contain
                   1225:         * references to each other.  On a last close of some other socket,
                   1226:         * we trigger a gc since the number of outstanding rights (unp_rights)
                   1227:         * is non-zero.  If during the sweep phase the gc code un_discards,
                   1228:         * we end up doing a (full) closef on the descriptor.  A closef on A
                   1229:         * results in the following chain.  Closef calls soo_close, which
                   1230:         * calls soclose.   Soclose calls first (through the switch
                   1231:         * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
                   1232:         * returns because the previous instance had set unp_gcing, and
                   1233:         * we return all the way back to soclose, which marks the socket
                   1234:         * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
                   1235:         * to free up the rights that are queued in messages on the socket A,
                   1236:         * i.e., the reference on B.  The sorflush calls via the dom_dispose
                   1237:         * switch unp_dispose, which unp_scans with unp_discard.  This second
                   1238:         * instance of unp_discard just calls closef on B.
                   1239:         *
                   1240:         * Well, a similar chain occurs on B, resulting in a sorflush on B,
                   1241:         * which results in another closef on A.  Unfortunately, A is already
                   1242:         * being closed, and the descriptor has already been marked with
                   1243:         * SS_NOFDREF, and soclose panics at this point.
                   1244:         *
                   1245:         * Here, we first take an extra reference to each inaccessible
1.39      sommerfe 1246:         * descriptor.  Then, if the inaccessible descriptor is a
                   1247:         * socket, we call sorflush in case it is a Unix domain
                   1248:         * socket.  After we destroy all the rights carried in
                   1249:         * messages, we do a last closef to get rid of our extra
                   1250:         * reference.  This is the last close, and the unp_detach etc
                   1251:         * will shut down the socket.
1.8       mycroft  1252:         *
                   1253:         * 91/09/19, bsy@cs.cmu.edu
                   1254:         */
1.95.2.3  ad       1255:        extra_ref = kmem_alloc(nfiles * sizeof(struct file *), KM_SLEEP);
                   1256:
                   1257:        mutex_enter(&filelist_lock);
1.54      matt     1258:        for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11      mycroft  1259:            fp = nextfp) {
1.54      matt     1260:                nextfp = LIST_NEXT(fp, f_list);
1.95.2.1  ad       1261:                mutex_enter(&fp->f_lock);
1.57      pk       1262:                if (fp->f_count != 0 &&
                   1263:                    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8       mycroft  1264:                        *fpp++ = fp;
                   1265:                        nunref++;
                   1266:                        fp->f_count++;
                   1267:                }
1.95.2.1  ad       1268:                mutex_exit(&fp->f_lock);
1.1       cgd      1269:        }
1.95.2.3  ad       1270:        mutex_exit(&filelist_lock);
                   1271:
1.39      sommerfe 1272:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1273:                fp = *fpp;
1.95.2.1  ad       1274:                mutex_enter(&fp->f_lock);
1.44      thorpej  1275:                FILE_USE(fp);
1.39      sommerfe 1276:                if (fp->f_type == DTYPE_SOCKET)
                   1277:                        sorflush((struct socket *)fp->f_data);
1.44      thorpej  1278:                FILE_UNUSE(fp, NULL);
1.39      sommerfe 1279:        }
1.44      thorpej  1280:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1281:                fp = *fpp;
1.95.2.1  ad       1282:                mutex_enter(&fp->f_lock);
1.44      thorpej  1283:                FILE_USE(fp);
1.86      christos 1284:                (void) closef(fp, (struct lwp *)0);
1.44      thorpej  1285:        }
1.95.2.3  ad       1286:        kmem_free(extra_ref, nfiles * sizeof(struct file *));
1.1       cgd      1287:        unp_gcing = 0;
                   1288: }
                   1289:
1.5       andrew   1290: void
1.76      matt     1291: unp_dispose(struct mbuf *m)
1.1       cgd      1292: {
1.8       mycroft  1293:
1.1       cgd      1294:        if (m)
1.39      sommerfe 1295:                unp_scan(m, unp_discard, 1);
1.1       cgd      1296: }
                   1297:
1.5       andrew   1298: void
1.76      matt     1299: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1       cgd      1300: {
1.46      augustss 1301:        struct mbuf *m;
                   1302:        struct file **rp;
                   1303:        struct cmsghdr *cm;
                   1304:        int i;
1.1       cgd      1305:        int qfds;
                   1306:
                   1307:        while (m0) {
1.48      thorpej  1308:                for (m = m0; m; m = m->m_next) {
1.1       cgd      1309:                        if (m->m_type == MT_CONTROL &&
                   1310:                            m->m_len >= sizeof(*cm)) {
                   1311:                                cm = mtod(m, struct cmsghdr *);
                   1312:                                if (cm->cmsg_level != SOL_SOCKET ||
                   1313:                                    cm->cmsg_type != SCM_RIGHTS)
                   1314:                                        continue;
1.48      thorpej  1315:                                qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1316:                                    / sizeof(struct file *);
                   1317:                                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe 1318:                                for (i = 0; i < qfds; i++) {
                   1319:                                        struct file *fp = *rp;
                   1320:                                        if (discard)
                   1321:                                                *rp = 0;
                   1322:                                        (*op)(fp);
                   1323:                                        rp++;
                   1324:                                }
1.1       cgd      1325:                                break;          /* XXX, but saves time */
                   1326:                        }
1.48      thorpej  1327:                }
1.52      thorpej  1328:                m0 = m0->m_nextpkt;
1.1       cgd      1329:        }
                   1330: }
                   1331:
1.5       andrew   1332: void
1.76      matt     1333: unp_mark(struct file *fp)
1.1       cgd      1334: {
1.80      perry    1335:
1.95.2.3  ad       1336:        if (fp == NULL)
1.39      sommerfe 1337:                return;
1.1       cgd      1338:
1.39      sommerfe 1339:        /* If we're already deferred, don't screw up the defer count */
1.95.2.3  ad       1340:        mutex_enter(&fp->f_lock);
                   1341:        if (fp->f_flag & (FMARK | FDEFER)) {
                   1342:                mutex_exit(&fp->f_lock);
1.1       cgd      1343:                return;
1.95.2.3  ad       1344:        }
1.39      sommerfe 1345:
                   1346:        /*
                   1347:         * Minimize the number of deferrals...  Sockets are the only
                   1348:         * type of descriptor which can hold references to another
                   1349:         * descriptor, so just mark other descriptors, and defer
                   1350:         * unmarked sockets for the next pass.
                   1351:         */
                   1352:        if (fp->f_type == DTYPE_SOCKET) {
                   1353:                unp_defer++;
                   1354:                if (fp->f_count == 0)
                   1355:                        panic("unp_mark: queued unref");
                   1356:                fp->f_flag |= FDEFER;
                   1357:        } else {
                   1358:                fp->f_flag |= FMARK;
                   1359:        }
1.95.2.3  ad       1360:        mutex_exit(&fp->f_lock);
1.39      sommerfe 1361:        return;
1.1       cgd      1362: }
                   1363:
1.5       andrew   1364: void
1.76      matt     1365: unp_discard(struct file *fp)
1.1       cgd      1366: {
1.39      sommerfe 1367:        if (fp == NULL)
                   1368:                return;
1.95.2.1  ad       1369:        mutex_enter(&fp->f_lock);
1.57      pk       1370:        fp->f_usecount++;       /* i.e. FILE_USE(fp) sans locking */
1.1       cgd      1371:        fp->f_msgcount--;
1.95.2.1  ad       1372:        mutex_exit(&fp->f_lock);
1.1       cgd      1373:        unp_rights--;
1.86      christos 1374:        (void) closef(fp, (struct lwp *)0);
1.1       cgd      1375: }

CVSweb <webmaster@jp.NetBSD.org>