[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.102

1.102   ! pooka       1: /*     $NetBSD: uipc_usrreq.c,v 1.101 2007/10/08 15:12:08 ad Exp $     */
1.30      thorpej     2:
                      3: /*-
1.77      matt        4:  * Copyright (c) 1998, 2000, 2004 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.10      cgd        39:
1.1       cgd        40: /*
1.8       mycroft    41:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     42:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        43:  *
                     44:  * Redistribution and use in source and binary forms, with or without
                     45:  * modification, are permitted provided that the following conditions
                     46:  * are met:
                     47:  * 1. Redistributions of source code must retain the above copyright
                     48:  *    notice, this list of conditions and the following disclaimer.
                     49:  * 2. Redistributions in binary form must reproduce the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer in the
                     51:  *    documentation and/or other materials provided with the distribution.
1.67      agc        52:  * 3. Neither the name of the University nor the names of its contributors
                     53:  *    may be used to endorse or promote products derived from this software
                     54:  *    without specific prior written permission.
                     55:  *
                     56:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     57:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     58:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     59:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     60:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     61:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     62:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     63:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     64:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     65:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     66:  * SUCH DAMAGE.
                     67:  *
                     68:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     69:  */
                     70:
                     71: /*
                     72:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     73:  *
                     74:  * Redistribution and use in source and binary forms, with or without
                     75:  * modification, are permitted provided that the following conditions
                     76:  * are met:
                     77:  * 1. Redistributions of source code must retain the above copyright
                     78:  *    notice, this list of conditions and the following disclaimer.
                     79:  * 2. Redistributions in binary form must reproduce the above copyright
                     80:  *    notice, this list of conditions and the following disclaimer in the
                     81:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        82:  * 3. All advertising materials mentioning features or use of this software
                     83:  *    must display the following acknowledgement:
                     84:  *     This product includes software developed by the University of
                     85:  *     California, Berkeley and its contributors.
                     86:  * 4. Neither the name of the University nor the names of its contributors
                     87:  *    may be used to endorse or promote products derived from this software
                     88:  *    without specific prior written permission.
                     89:  *
                     90:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     91:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     92:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     93:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     94:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     95:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     96:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     97:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     98:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     99:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    100:  * SUCH DAMAGE.
                    101:  *
1.31      fvdl      102:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd       103:  */
1.53      lukem     104:
                    105: #include <sys/cdefs.h>
1.102   ! pooka     106: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.101 2007/10/08 15:12:08 ad Exp $");
1.1       cgd       107:
1.7       mycroft   108: #include <sys/param.h>
1.8       mycroft   109: #include <sys/systm.h>
1.7       mycroft   110: #include <sys/proc.h>
                    111: #include <sys/filedesc.h>
                    112: #include <sys/domain.h>
                    113: #include <sys/protosw.h>
                    114: #include <sys/socket.h>
                    115: #include <sys/socketvar.h>
                    116: #include <sys/unpcb.h>
                    117: #include <sys/un.h>
                    118: #include <sys/namei.h>
                    119: #include <sys/vnode.h>
                    120: #include <sys/file.h>
                    121: #include <sys/stat.h>
                    122: #include <sys/mbuf.h>
1.91      elad      123: #include <sys/kauth.h>
1.101     ad        124: #include <sys/kmem.h>
1.1       cgd       125:
                    126: /*
                    127:  * Unix communications domain.
                    128:  *
                    129:  * TODO:
                    130:  *     SEQPACKET, RDM
                    131:  *     rethink name space problems
                    132:  *     need a proper out-of-band
                    133:  */
1.93      christos  134: const struct sockaddr_un sun_noname = {
                    135:        .sun_len = sizeof(sun_noname),
                    136:        .sun_family = AF_LOCAL,
                    137: };
1.1       cgd       138: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    139:
1.92      ad        140: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.30      thorpej   141:
1.20      mycroft   142: int
1.76      matt      143: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        144:        struct lwp *l)
1.20      mycroft   145: {
                    146:        struct socket *so2;
1.77      matt      147:        const struct sockaddr_un *sun;
1.20      mycroft   148:
                    149:        so2 = unp->unp_conn->unp_socket;
                    150:        if (unp->unp_addr)
                    151:                sun = unp->unp_addr;
                    152:        else
                    153:                sun = &sun_noname;
1.30      thorpej   154:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        155:                control = unp_addsockcred(l, control);
1.82      christos  156:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   157:            control) == 0) {
1.98      martin    158:                unp_dispose(control);
1.20      mycroft   159:                m_freem(control);
                    160:                m_freem(m);
1.79      darrenr   161:                so2->so_rcv.sb_overflowed++;
1.60      christos  162:                return (ENOBUFS);
1.20      mycroft   163:        } else {
                    164:                sorwakeup(so2);
                    165:                return (0);
                    166:        }
                    167: }
                    168:
                    169: void
1.76      matt      170: unp_setsockaddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   171: {
1.77      matt      172:        const struct sockaddr_un *sun;
1.20      mycroft   173:
                    174:        if (unp->unp_addr)
                    175:                sun = unp->unp_addr;
                    176:        else
                    177:                sun = &sun_noname;
                    178:        nam->m_len = sun->sun_len;
1.56      itojun    179:        if (nam->m_len > MLEN)
1.27      thorpej   180:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  181:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   182: }
                    183:
                    184: void
1.76      matt      185: unp_setpeeraddr(struct unpcb *unp, struct mbuf *nam)
1.20      mycroft   186: {
1.77      matt      187:        const struct sockaddr_un *sun;
1.20      mycroft   188:
                    189:        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    190:                sun = unp->unp_conn->unp_addr;
                    191:        else
                    192:                sun = &sun_noname;
                    193:        nam->m_len = sun->sun_len;
1.56      itojun    194:        if (nam->m_len > MLEN)
1.27      thorpej   195:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.95      christos  196:        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
1.20      mycroft   197: }
                    198:
1.1       cgd       199: /*ARGSUSED*/
1.5       andrew    200: int
1.76      matt      201: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86      christos  202:        struct mbuf *control, struct lwp *l)
1.1       cgd       203: {
                    204:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  205:        struct socket *so2;
1.86      christos  206:        struct proc *p;
1.75      christos  207:        u_int newhiwat;
1.46      augustss  208:        int error = 0;
1.1       cgd       209:
                    210:        if (req == PRU_CONTROL)
                    211:                return (EOPNOTSUPP);
1.20      mycroft   212:
1.22      mycroft   213: #ifdef DIAGNOSTIC
                    214:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    215:                panic("uipc_usrreq: unexpected control mbuf");
                    216: #endif
1.86      christos  217:        p = l ? l->l_proc : NULL;
1.1       cgd       218:        if (unp == 0 && req != PRU_ATTACH) {
                    219:                error = EINVAL;
                    220:                goto release;
                    221:        }
1.20      mycroft   222:
1.1       cgd       223:        switch (req) {
                    224:
                    225:        case PRU_ATTACH:
1.20      mycroft   226:                if (unp != 0) {
1.1       cgd       227:                        error = EISCONN;
                    228:                        break;
                    229:                }
                    230:                error = unp_attach(so);
                    231:                break;
                    232:
                    233:        case PRU_DETACH:
                    234:                unp_detach(unp);
                    235:                break;
                    236:
                    237:        case PRU_BIND:
1.90      christos  238:                KASSERT(l != NULL);
1.86      christos  239:                error = unp_bind(unp, nam, l);
1.1       cgd       240:                break;
                    241:
                    242:        case PRU_LISTEN:
                    243:                if (unp->unp_vnode == 0)
                    244:                        error = EINVAL;
                    245:                break;
                    246:
                    247:        case PRU_CONNECT:
1.90      christos  248:                KASSERT(l != NULL);
1.86      christos  249:                error = unp_connect(so, nam, l);
1.1       cgd       250:                break;
                    251:
                    252:        case PRU_CONNECT2:
1.72      matt      253:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       254:                break;
                    255:
                    256:        case PRU_DISCONNECT:
                    257:                unp_disconnect(unp);
                    258:                break;
                    259:
                    260:        case PRU_ACCEPT:
1.20      mycroft   261:                unp_setpeeraddr(unp, nam);
1.72      matt      262:                /*
                    263:                 * Mark the initiating STREAM socket as connected *ONLY*
                    264:                 * after it's been accepted.  This prevents a client from
                    265:                 * overrunning a server and receiving ECONNREFUSED.
                    266:                 */
                    267:                if (unp->unp_conn != NULL &&
                    268:                    (unp->unp_conn->unp_socket->so_state & SS_ISCONNECTING))
                    269:                        soisconnected(unp->unp_conn->unp_socket);
1.1       cgd       270:                break;
                    271:
                    272:        case PRU_SHUTDOWN:
                    273:                socantsendmore(so);
                    274:                unp_shutdown(unp);
                    275:                break;
                    276:
                    277:        case PRU_RCVD:
                    278:                switch (so->so_type) {
                    279:
                    280:                case SOCK_DGRAM:
                    281:                        panic("uipc 1");
                    282:                        /*NOTREACHED*/
                    283:
                    284:                case SOCK_STREAM:
                    285: #define        rcv (&so->so_rcv)
                    286: #define snd (&so2->so_snd)
                    287:                        if (unp->unp_conn == 0)
                    288:                                break;
                    289:                        so2 = unp->unp_conn->unp_socket;
                    290:                        /*
                    291:                         * Adjust backpressure on sender
                    292:                         * and wakeup any waiting to write.
                    293:                         */
                    294:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    295:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  296:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  297:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  298:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       299:                        unp->unp_cc = rcv->sb_cc;
                    300:                        sowwakeup(so2);
                    301: #undef snd
                    302: #undef rcv
                    303:                        break;
                    304:
                    305:                default:
                    306:                        panic("uipc 2");
                    307:                }
                    308:                break;
                    309:
                    310:        case PRU_SEND:
1.30      thorpej   311:                /*
                    312:                 * Note: unp_internalize() rejects any control message
                    313:                 * other than SCM_RIGHTS, and only allows one.  This
                    314:                 * has the side-effect of preventing a caller from
                    315:                 * forging SCM_CREDS.
                    316:                 */
1.90      christos  317:                if (control) {
                    318:                        KASSERT(l != NULL);
                    319:                        if ((error = unp_internalize(control, l)) != 0)
                    320:                                goto die;
1.83      yamt      321:                }
1.1       cgd       322:                switch (so->so_type) {
                    323:
                    324:                case SOCK_DGRAM: {
                    325:                        if (nam) {
1.20      mycroft   326:                                if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1       cgd       327:                                        error = EISCONN;
1.21      mycroft   328:                                        goto die;
1.1       cgd       329:                                }
1.90      christos  330:                                KASSERT(l != NULL);
1.86      christos  331:                                error = unp_connect(so, nam, l);
1.20      mycroft   332:                                if (error) {
1.23      mycroft   333:                                die:
1.98      martin    334:                                        unp_dispose(control);
1.21      mycroft   335:                                        m_freem(control);
1.20      mycroft   336:                                        m_freem(m);
1.1       cgd       337:                                        break;
1.20      mycroft   338:                                }
1.1       cgd       339:                        } else {
1.20      mycroft   340:                                if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1       cgd       341:                                        error = ENOTCONN;
1.21      mycroft   342:                                        goto die;
1.1       cgd       343:                                }
                    344:                        }
1.89      christos  345:                        KASSERT(p != NULL);
1.92      ad        346:                        error = unp_output(m, control, unp, l);
1.1       cgd       347:                        if (nam)
                    348:                                unp_disconnect(unp);
                    349:                        break;
                    350:                }
                    351:
                    352:                case SOCK_STREAM:
                    353: #define        rcv (&so2->so_rcv)
                    354: #define        snd (&so->so_snd)
1.87      christos  355:                        if (unp->unp_conn == NULL) {
                    356:                                error = ENOTCONN;
                    357:                                break;
                    358:                        }
1.1       cgd       359:                        so2 = unp->unp_conn->unp_socket;
1.30      thorpej   360:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    361:                                /*
                    362:                                 * Credentials are passed only once on
                    363:                                 * SOCK_STREAM.
                    364:                                 */
                    365:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        366:                                control = unp_addsockcred(l, control);
1.30      thorpej   367:                        }
1.1       cgd       368:                        /*
                    369:                         * Send to paired receive port, and then reduce
                    370:                         * send buffer hiwater marks to maintain backpressure.
                    371:                         * Wake up readers.
                    372:                         */
                    373:                        if (control) {
1.98      martin    374:                                if (sbappendcontrol(rcv, m, control) == 0) {
                    375:                                        unp_dispose(control);
1.21      mycroft   376:                                        m_freem(control);
1.98      martin    377:                                }
1.1       cgd       378:                        } else
                    379:                                sbappend(rcv, m);
                    380:                        snd->sb_mbmax -=
                    381:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    382:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  383:                        newhiwat = snd->sb_hiwat -
                    384:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  385:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  386:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       387:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    388:                        sorwakeup(so2);
                    389: #undef snd
                    390: #undef rcv
                    391:                        break;
                    392:
                    393:                default:
                    394:                        panic("uipc 4");
                    395:                }
                    396:                break;
                    397:
                    398:        case PRU_ABORT:
                    399:                unp_drop(unp, ECONNABORTED);
1.39      sommerfe  400:
1.88      matt      401:                KASSERT(so->so_head == NULL);
1.39      sommerfe  402: #ifdef DIAGNOSTIC
                    403:                if (so->so_pcb == 0)
                    404:                        panic("uipc 5: drop killed pcb");
                    405: #endif
                    406:                unp_detach(unp);
1.1       cgd       407:                break;
                    408:
                    409:        case PRU_SENSE:
                    410:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
                    411:                if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
                    412:                        so2 = unp->unp_conn->unp_socket;
                    413:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
                    414:                }
                    415:                ((struct stat *) m)->st_dev = NODEV;
                    416:                if (unp->unp_ino == 0)
                    417:                        unp->unp_ino = unp_ino++;
1.25      kleink    418:                ((struct stat *) m)->st_atimespec =
                    419:                    ((struct stat *) m)->st_mtimespec =
                    420:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       421:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    422:                return (0);
                    423:
                    424:        case PRU_RCVOOB:
1.20      mycroft   425:                error = EOPNOTSUPP;
                    426:                break;
1.1       cgd       427:
                    428:        case PRU_SENDOOB:
1.22      mycroft   429:                m_freem(control);
1.20      mycroft   430:                m_freem(m);
1.1       cgd       431:                error = EOPNOTSUPP;
                    432:                break;
                    433:
                    434:        case PRU_SOCKADDR:
1.20      mycroft   435:                unp_setsockaddr(unp, nam);
1.1       cgd       436:                break;
                    437:
                    438:        case PRU_PEERADDR:
1.20      mycroft   439:                unp_setpeeraddr(unp, nam);
1.1       cgd       440:                break;
                    441:
                    442:        default:
                    443:                panic("piusrreq");
                    444:        }
1.20      mycroft   445:
1.1       cgd       446: release:
                    447:        return (error);
                    448: }
                    449:
                    450: /*
1.30      thorpej   451:  * Unix domain socket option processing.
                    452:  */
                    453: int
1.76      matt      454: uipc_ctloutput(int op, struct socket *so, int level, int optname,
                    455:        struct mbuf **mp)
1.30      thorpej   456: {
                    457:        struct unpcb *unp = sotounpcb(so);
                    458:        struct mbuf *m = *mp;
                    459:        int optval = 0, error = 0;
                    460:
                    461:        if (level != 0) {
1.100     dyoung    462:                error = ENOPROTOOPT;
1.30      thorpej   463:                if (op == PRCO_SETOPT && m)
                    464:                        (void) m_free(m);
                    465:        } else switch (op) {
                    466:
                    467:        case PRCO_SETOPT:
                    468:                switch (optname) {
                    469:                case LOCAL_CREDS:
1.72      matt      470:                case LOCAL_CONNWAIT:
1.30      thorpej   471:                        if (m == NULL || m->m_len != sizeof(int))
                    472:                                error = EINVAL;
                    473:                        else {
                    474:                                optval = *mtod(m, int *);
                    475:                                switch (optname) {
                    476: #define        OPTSET(bit) \
                    477:        if (optval) \
                    478:                unp->unp_flags |= (bit); \
                    479:        else \
                    480:                unp->unp_flags &= ~(bit);
                    481:
                    482:                                case LOCAL_CREDS:
                    483:                                        OPTSET(UNP_WANTCRED);
                    484:                                        break;
1.72      matt      485:                                case LOCAL_CONNWAIT:
                    486:                                        OPTSET(UNP_CONNWAIT);
                    487:                                        break;
1.30      thorpej   488:                                }
                    489:                        }
                    490:                        break;
                    491: #undef OPTSET
                    492:
                    493:                default:
                    494:                        error = ENOPROTOOPT;
                    495:                        break;
                    496:                }
                    497:                if (m)
                    498:                        (void) m_free(m);
                    499:                break;
                    500:
                    501:        case PRCO_GETOPT:
                    502:                switch (optname) {
1.99      he        503:                case LOCAL_PEEREID:
                    504:                        if (unp->unp_flags & UNP_EIDSVALID) {
                    505:                                *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    506:                                m->m_len = sizeof(struct unpcbid);
                    507:                                *mtod(m, struct unpcbid *) = unp->unp_connid;
                    508:                        } else {
                    509:                                error = EINVAL;
                    510:                        }
                    511:                        break;
1.30      thorpej   512:                case LOCAL_CREDS:
                    513:                        *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    514:                        m->m_len = sizeof(int);
                    515:
                    516: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    517:
1.99      he        518:                        optval = OPTBIT(UNP_WANTCRED);
1.30      thorpej   519:                        *mtod(m, int *) = optval;
                    520:                        break;
                    521: #undef OPTBIT
                    522:
                    523:                default:
                    524:                        error = ENOPROTOOPT;
                    525:                        break;
                    526:                }
                    527:                break;
                    528:        }
                    529:        return (error);
                    530: }
                    531:
                    532: /*
1.1       cgd       533:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    534:  * for stream sockets, although the total for sender and receiver is
                    535:  * actually only PIPSIZ.
                    536:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    537:  * and don't really want to reserve the sendspace.  Their recvspace should
                    538:  * be large enough for at least one max-size datagram plus address.
                    539:  */
                    540: #define        PIPSIZ  4096
                    541: u_long unpst_sendspace = PIPSIZ;
                    542: u_long unpst_recvspace = PIPSIZ;
                    543: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    544: u_long unpdg_recvspace = 4*1024;
                    545:
                    546: int    unp_rights;                     /* file descriptors in flight */
                    547:
1.5       andrew    548: int
1.76      matt      549: unp_attach(struct socket *so)
1.1       cgd       550: {
1.46      augustss  551:        struct unpcb *unp;
1.1       cgd       552:        int error;
1.80      perry     553:
1.1       cgd       554:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    555:                switch (so->so_type) {
                    556:
                    557:                case SOCK_STREAM:
                    558:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
                    559:                        break;
                    560:
                    561:                case SOCK_DGRAM:
                    562:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
                    563:                        break;
1.8       mycroft   564:
                    565:                default:
                    566:                        panic("unp_attach");
1.1       cgd       567:                }
                    568:                if (error)
                    569:                        return (error);
                    570:        }
1.14      mycroft   571:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    572:        if (unp == NULL)
1.1       cgd       573:                return (ENOBUFS);
1.95      christos  574:        memset((void *)unp, 0, sizeof(*unp));
1.14      mycroft   575:        unp->unp_socket = so;
1.15      mycroft   576:        so->so_pcb = unp;
1.85      simonb    577:        nanotime(&unp->unp_ctime);
1.1       cgd       578:        return (0);
                    579: }
                    580:
1.17      pk        581: void
1.76      matt      582: unp_detach(struct unpcb *unp)
1.1       cgd       583: {
1.80      perry     584:
1.1       cgd       585:        if (unp->unp_vnode) {
                    586:                unp->unp_vnode->v_socket = 0;
                    587:                vrele(unp->unp_vnode);
                    588:                unp->unp_vnode = 0;
                    589:        }
                    590:        if (unp->unp_conn)
                    591:                unp_disconnect(unp);
                    592:        while (unp->unp_refs)
                    593:                unp_drop(unp->unp_refs, ECONNRESET);
                    594:        soisdisconnected(unp->unp_socket);
                    595:        unp->unp_socket->so_pcb = 0;
1.20      mycroft   596:        if (unp->unp_addr)
1.26      thorpej   597:                free(unp->unp_addr, M_SONAME);
1.8       mycroft   598:        if (unp_rights) {
                    599:                /*
                    600:                 * Normally the receive buffer is flushed later,
                    601:                 * in sofree, but if our receive buffer holds references
                    602:                 * to descriptors that are now garbage, we will dispose
                    603:                 * of those descriptor references after the garbage collector
                    604:                 * gets them (resulting in a "panic: closef: count < 0").
                    605:                 */
                    606:                sorflush(unp->unp_socket);
1.14      mycroft   607:                free(unp, M_PCB);
1.1       cgd       608:                unp_gc();
1.14      mycroft   609:        } else
                    610:                free(unp, M_PCB);
1.1       cgd       611: }
                    612:
1.5       andrew    613: int
1.86      christos  614: unp_bind(struct unpcb *unp, struct mbuf *nam, struct lwp *l)
1.1       cgd       615: {
1.27      thorpej   616:        struct sockaddr_un *sun;
1.46      augustss  617:        struct vnode *vp;
1.1       cgd       618:        struct vattr vattr;
1.27      thorpej   619:        size_t addrlen;
1.86      christos  620:        struct proc *p;
1.1       cgd       621:        int error;
                    622:        struct nameidata nd;
                    623:
1.20      mycroft   624:        if (unp->unp_vnode != 0)
                    625:                return (EINVAL);
1.27      thorpej   626:
1.86      christos  627:        p = l->l_proc;
1.27      thorpej   628:        /*
                    629:         * Allocate the new sockaddr.  We have to allocate one
                    630:         * extra byte so that we can ensure that the pathname
                    631:         * is nul-terminated.
                    632:         */
                    633:        addrlen = nam->m_len + 1;
                    634:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  635:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   636:        *(((char *)sun) + nam->m_len) = '\0';
                    637:
1.97      dsl       638:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, UIO_SYSSPACE,
1.86      christos  639:            sun->sun_path, l);
1.27      thorpej   640:
1.1       cgd       641: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16      christos  642:        if ((error = namei(&nd)) != 0)
1.27      thorpej   643:                goto bad;
1.9       mycroft   644:        vp = nd.ni_vp;
1.96      hannken   645:        if (vp != NULL) {
1.9       mycroft   646:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    647:                if (nd.ni_dvp == vp)
                    648:                        vrele(nd.ni_dvp);
1.1       cgd       649:                else
1.9       mycroft   650:                        vput(nd.ni_dvp);
1.1       cgd       651:                vrele(vp);
1.96      hannken   652:                error = EADDRINUSE;
                    653:                goto bad;
1.1       cgd       654:        }
                    655:        VATTR_NULL(&vattr);
                    656:        vattr.va_type = VSOCK;
1.84      jmmv      657:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.102   ! pooka     658:        VOP_LEASE(nd.ni_dvp, l->l_cred, LEASE_WRITE);
1.16      christos  659:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
                    660:        if (error)
1.27      thorpej   661:                goto bad;
1.9       mycroft   662:        vp = nd.ni_vp;
1.1       cgd       663:        vp->v_socket = unp->unp_socket;
                    664:        unp->unp_vnode = vp;
1.27      thorpej   665:        unp->unp_addrlen = addrlen;
                    666:        unp->unp_addr = sun;
1.99      he        667:        unp->unp_connid.unp_pid = p->p_pid;
                    668:        unp->unp_connid.unp_euid = kauth_cred_geteuid(p->p_cred);
                    669:        unp->unp_connid.unp_egid = kauth_cred_getegid(p->p_cred);
                    670:        unp->unp_flags |= UNP_EIDSBIND;
1.31      fvdl      671:        VOP_UNLOCK(vp, 0);
1.1       cgd       672:        return (0);
1.27      thorpej   673:
                    674:  bad:
                    675:        free(sun, M_SONAME);
                    676:        return (error);
1.1       cgd       677: }
                    678:
1.5       andrew    679: int
1.86      christos  680: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       681: {
1.46      augustss  682:        struct sockaddr_un *sun;
                    683:        struct vnode *vp;
                    684:        struct socket *so2, *so3;
1.99      he        685:        struct unpcb *unp, *unp2, *unp3;
1.27      thorpej   686:        size_t addrlen;
1.99      he        687:        struct proc *p;
1.1       cgd       688:        int error;
                    689:        struct nameidata nd;
                    690:
1.99      he        691:        p = l->l_proc;
1.27      thorpej   692:        /*
                    693:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    694:         * byte so that we can ensure that the pathname is nul-terminated.
                    695:         * When we establish the connection, we copy the other PCB's
                    696:         * sockaddr to our own.
                    697:         */
                    698:        addrlen = nam->m_len + 1;
                    699:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  700:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   701:        *(((char *)sun) + nam->m_len) = '\0';
                    702:
1.97      dsl       703:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, UIO_SYSSPACE, sun->sun_path, l);
1.27      thorpej   704:
1.16      christos  705:        if ((error = namei(&nd)) != 0)
1.27      thorpej   706:                goto bad2;
1.9       mycroft   707:        vp = nd.ni_vp;
1.1       cgd       708:        if (vp->v_type != VSOCK) {
                    709:                error = ENOTSOCK;
                    710:                goto bad;
                    711:        }
1.102   ! pooka     712:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
1.1       cgd       713:                goto bad;
                    714:        so2 = vp->v_socket;
                    715:        if (so2 == 0) {
                    716:                error = ECONNREFUSED;
                    717:                goto bad;
                    718:        }
                    719:        if (so->so_type != so2->so_type) {
                    720:                error = EPROTOTYPE;
                    721:                goto bad;
                    722:        }
                    723:        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                    724:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    725:                    (so3 = sonewconn(so2, 0)) == 0) {
                    726:                        error = ECONNREFUSED;
                    727:                        goto bad;
                    728:                }
1.99      he        729:                unp = sotounpcb(so);
1.1       cgd       730:                unp2 = sotounpcb(so2);
                    731:                unp3 = sotounpcb(so3);
1.26      thorpej   732:                if (unp2->unp_addr) {
                    733:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                    734:                            M_SONAME, M_WAITOK);
1.36      perry     735:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej   736:                            unp2->unp_addrlen);
                    737:                        unp3->unp_addrlen = unp2->unp_addrlen;
                    738:                }
1.30      thorpej   739:                unp3->unp_flags = unp2->unp_flags;
1.99      he        740:                unp3->unp_connid.unp_pid = p->p_pid;
                    741:                unp3->unp_connid.unp_euid = kauth_cred_geteuid(p->p_cred);
                    742:                unp3->unp_connid.unp_egid = kauth_cred_getegid(p->p_cred);
                    743:                unp3->unp_flags |= UNP_EIDSVALID;
1.33      thorpej   744:                so2 = so3;
1.99      he        745:                if (unp2->unp_flags & UNP_EIDSBIND) {
                    746:                        unp->unp_connid = unp2->unp_connid;
                    747:                        unp->unp_flags |= UNP_EIDSVALID;
                    748:                }
1.33      thorpej   749:        }
1.72      matt      750:        error = unp_connect2(so, so2, PRU_CONNECT);
1.27      thorpej   751:  bad:
1.1       cgd       752:        vput(vp);
1.27      thorpej   753:  bad2:
                    754:        free(sun, M_SONAME);
1.1       cgd       755:        return (error);
                    756: }
                    757:
1.5       andrew    758: int
1.76      matt      759: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd       760: {
1.46      augustss  761:        struct unpcb *unp = sotounpcb(so);
                    762:        struct unpcb *unp2;
1.1       cgd       763:
                    764:        if (so2->so_type != so->so_type)
                    765:                return (EPROTOTYPE);
                    766:        unp2 = sotounpcb(so2);
                    767:        unp->unp_conn = unp2;
                    768:        switch (so->so_type) {
                    769:
                    770:        case SOCK_DGRAM:
                    771:                unp->unp_nextref = unp2->unp_refs;
                    772:                unp2->unp_refs = unp;
                    773:                soisconnected(so);
                    774:                break;
                    775:
                    776:        case SOCK_STREAM:
                    777:                unp2->unp_conn = unp;
1.72      matt      778:                if (req == PRU_CONNECT &&
                    779:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                    780:                        soisconnecting(so);
                    781:                else
                    782:                        soisconnected(so);
1.1       cgd       783:                soisconnected(so2);
                    784:                break;
                    785:
                    786:        default:
                    787:                panic("unp_connect2");
                    788:        }
                    789:        return (0);
                    790: }
                    791:
1.5       andrew    792: void
1.76      matt      793: unp_disconnect(struct unpcb *unp)
1.1       cgd       794: {
1.46      augustss  795:        struct unpcb *unp2 = unp->unp_conn;
1.1       cgd       796:
                    797:        if (unp2 == 0)
                    798:                return;
                    799:        unp->unp_conn = 0;
                    800:        switch (unp->unp_socket->so_type) {
                    801:
                    802:        case SOCK_DGRAM:
                    803:                if (unp2->unp_refs == unp)
                    804:                        unp2->unp_refs = unp->unp_nextref;
                    805:                else {
                    806:                        unp2 = unp2->unp_refs;
                    807:                        for (;;) {
                    808:                                if (unp2 == 0)
                    809:                                        panic("unp_disconnect");
                    810:                                if (unp2->unp_nextref == unp)
                    811:                                        break;
                    812:                                unp2 = unp2->unp_nextref;
                    813:                        }
                    814:                        unp2->unp_nextref = unp->unp_nextref;
                    815:                }
                    816:                unp->unp_nextref = 0;
                    817:                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                    818:                break;
                    819:
                    820:        case SOCK_STREAM:
                    821:                soisdisconnected(unp->unp_socket);
                    822:                unp2->unp_conn = 0;
                    823:                soisdisconnected(unp2->unp_socket);
                    824:                break;
                    825:        }
                    826: }
                    827:
                    828: #ifdef notdef
1.76      matt      829: unp_abort(struct unpcb *unp)
1.1       cgd       830: {
                    831:        unp_detach(unp);
                    832: }
                    833: #endif
                    834:
1.5       andrew    835: void
1.76      matt      836: unp_shutdown(struct unpcb *unp)
1.1       cgd       837: {
                    838:        struct socket *so;
                    839:
                    840:        if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
                    841:            (so = unp->unp_conn->unp_socket))
                    842:                socantrcvmore(so);
                    843: }
                    844:
1.5       andrew    845: void
1.76      matt      846: unp_drop(struct unpcb *unp, int errno)
1.1       cgd       847: {
                    848:        struct socket *so = unp->unp_socket;
                    849:
                    850:        so->so_error = errno;
                    851:        unp_disconnect(unp);
                    852:        if (so->so_head) {
1.15      mycroft   853:                so->so_pcb = 0;
1.14      mycroft   854:                sofree(so);
1.20      mycroft   855:                if (unp->unp_addr)
1.26      thorpej   856:                        free(unp->unp_addr, M_SONAME);
1.14      mycroft   857:                free(unp, M_PCB);
1.1       cgd       858:        }
                    859: }
                    860:
                    861: #ifdef notdef
1.76      matt      862: unp_drain(void)
1.1       cgd       863: {
                    864:
                    865: }
                    866: #endif
                    867:
1.5       andrew    868: int
1.86      christos  869: unp_externalize(struct mbuf *rights, struct lwp *l)
1.1       cgd       870: {
1.46      augustss  871:        struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.86      christos  872:        struct proc *p = l->l_proc;
1.47      thorpej   873:        int i, *fdp;
1.46      augustss  874:        struct file **rp;
                    875:        struct file *fp;
1.50      thorpej   876:        int nfds, error = 0;
1.47      thorpej   877:
                    878:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
                    879:            sizeof(struct file *);
                    880:        rp = (struct file **)CMSG_DATA(cm);
1.1       cgd       881:
1.50      thorpej   882:        fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
1.101     ad        883:        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50      thorpej   884:
1.39      sommerfe  885:        /* Make sure the recipient should be able to see the descriptors.. */
1.42      thorpej   886:        if (p->p_cwdi->cwdi_rdir != NULL) {
1.48      thorpej   887:                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  888:                for (i = 0; i < nfds; i++) {
                    889:                        fp = *rp++;
                    890:                        /*
                    891:                         * If we are in a chroot'ed directory, and
                    892:                         * someone wants to pass us a directory, make
                    893:                         * sure it's inside the subtree we're allowed
                    894:                         * to access.
                    895:                         */
                    896:                        if (fp->f_type == DTYPE_VNODE) {
                    897:                                struct vnode *vp = (struct vnode *)fp->f_data;
                    898:                                if ((vp->v_type == VDIR) &&
1.86      christos  899:                                    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
1.39      sommerfe  900:                                        error = EPERM;
                    901:                                        break;
                    902:                                }
                    903:                        }
                    904:                }
                    905:        }
1.50      thorpej   906:
                    907:  restart:
1.47      thorpej   908:        rp = (struct file **)CMSG_DATA(cm);
1.50      thorpej   909:        if (error != 0) {
1.24      cgd       910:                for (i = 0; i < nfds; i++) {
1.1       cgd       911:                        fp = *rp;
1.39      sommerfe  912:                        /*
                    913:                         * zero the pointer before calling unp_discard,
                    914:                         * since it may end up in unp_gc()..
                    915:                         */
                    916:                        *rp++ = 0;
1.1       cgd       917:                        unp_discard(fp);
                    918:                }
1.50      thorpej   919:                goto out;
1.1       cgd       920:        }
1.50      thorpej   921:
1.24      cgd       922:        /*
1.50      thorpej   923:         * First loop -- allocate file descriptor table slots for the
                    924:         * new descriptors.
1.24      cgd       925:         */
                    926:        for (i = 0; i < nfds; i++) {
1.39      sommerfe  927:                fp = *rp++;
1.50      thorpej   928:                if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej   929:                        /*
1.50      thorpej   930:                         * Back out what we've done so far.
1.49      thorpej   931:                         */
1.50      thorpej   932:                        for (--i; i >= 0; i--)
                    933:                                fdremove(p->p_fd, fdp[i]);
                    934:
                    935:                        if (error == ENOSPC) {
                    936:                                fdexpand(p);
                    937:                                error = 0;
                    938:                        } else {
                    939:                                /*
                    940:                                 * This is the error that has historically
                    941:                                 * been returned, and some callers may
                    942:                                 * expect it.
                    943:                                 */
                    944:                                error = EMSGSIZE;
                    945:                        }
                    946:                        goto restart;
1.49      thorpej   947:                }
1.50      thorpej   948:
                    949:                /*
                    950:                 * Make the slot reference the descriptor so that
                    951:                 * fdalloc() works properly.. We finalize it all
                    952:                 * in the loop below.
                    953:                 */
1.101     ad        954:                rw_enter(&p->p_fd->fd_lock, RW_WRITER);
1.50      thorpej   955:                p->p_fd->fd_ofiles[fdp[i]] = fp;
1.101     ad        956:                rw_exit(&p->p_fd->fd_lock);
1.1       cgd       957:        }
1.24      cgd       958:
                    959:        /*
1.50      thorpej   960:         * Now that adding them has succeeded, update all of the
                    961:         * descriptor passing state.
1.24      cgd       962:         */
1.50      thorpej   963:        rp = (struct file **)CMSG_DATA(cm);
                    964:        for (i = 0; i < nfds; i++) {
                    965:                fp = *rp++;
                    966:                fp->f_msgcount--;
                    967:                unp_rights--;
                    968:        }
                    969:
                    970:        /*
                    971:         * Copy temporary array to message and adjust length, in case of
                    972:         * transition from large struct file pointers to ints.
                    973:         */
                    974:        memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
1.47      thorpej   975:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                    976:        rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.50      thorpej   977:  out:
1.101     ad        978:        rw_exit(&p->p_cwdi->cwdi_lock);
1.50      thorpej   979:        free(fdp, M_TEMP);
                    980:        return (error);
1.1       cgd       981: }
                    982:
1.5       andrew    983: int
1.86      christos  984: unp_internalize(struct mbuf *control, struct lwp *l)
1.1       cgd       985: {
1.86      christos  986:        struct proc *p = l->l_proc;
1.24      cgd       987:        struct filedesc *fdescp = p->p_fd;
1.73      martin    988:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
                    989:        struct file **rp, **files;
1.46      augustss  990:        struct file *fp;
                    991:        int i, fd, *fdp;
1.24      cgd       992:        int nfds;
                    993:        u_int neededspace;
1.38      thorpej   994:
1.24      cgd       995:        /* Sanity check the control message header */
1.66      jdolecek  996:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.1       cgd       997:            cm->cmsg_len != control->m_len)
                    998:                return (EINVAL);
1.24      cgd       999:
                   1000:        /* Verify that the file descriptors are valid */
1.47      thorpej  1001:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                   1002:        fdp = (int *)CMSG_DATA(cm);
1.24      cgd      1003:        for (i = 0; i < nfds; i++) {
                   1004:                fd = *fdp++;
1.58      pk       1005:                if ((fp = fd_getfile(fdescp, fd)) == NULL)
1.1       cgd      1006:                        return (EBADF);
1.101     ad       1007:                /* XXXSMP grab reference to file */
                   1008:                mutex_exit(&fp->f_lock);
1.1       cgd      1009:        }
1.24      cgd      1010:
                   1011:        /* Make sure we have room for the struct file pointers */
1.47      thorpej  1012:        neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
                   1013:            control->m_len;
1.24      cgd      1014:        if (neededspace > M_TRAILINGSPACE(control)) {
                   1015:
1.73      martin   1016:                /* allocate new space and copy header into it */
                   1017:                newcm = malloc(
                   1018:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                   1019:                    M_MBUF, M_WAITOK);
1.101     ad       1020:                if (newcm == NULL) {
                   1021:                        /* XXXSMP drop references to files */
1.24      cgd      1022:                        return (E2BIG);
1.101     ad       1023:                }
1.73      martin   1024:                memcpy(newcm, cm, sizeof(struct cmsghdr));
1.80      perry    1025:                files = (struct file **)CMSG_DATA(newcm);
1.73      martin   1026:        } else {
                   1027:                /* we can convert in-place */
                   1028:                newcm = NULL;
                   1029:                files = (struct file **)CMSG_DATA(cm);
1.24      cgd      1030:        }
                   1031:
                   1032:        /*
                   1033:         * Transform the file descriptors into struct file pointers, in
                   1034:         * reverse order so that if pointers are bigger than ints, the
                   1035:         * int won't get until we're done.
                   1036:         */
1.101     ad       1037:        rw_enter(&fdescp->fd_lock, RW_READER);
1.94      cbiere   1038:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1039:        rp = files + nfds;
1.24      cgd      1040:        for (i = 0; i < nfds; i++) {
1.94      cbiere   1041:                fp = fdescp->fd_ofiles[*--fdp];
1.101     ad       1042:                mutex_enter(&fp->f_lock);
1.57      pk       1043: #ifdef DIAGNOSTIC
                   1044:                if (fp->f_iflags & FIF_WANTCLOSE)
                   1045:                        panic("unp_internalize: file already closed");
                   1046: #endif
1.94      cbiere   1047:                *--rp = fp;
1.1       cgd      1048:                fp->f_count++;
                   1049:                fp->f_msgcount++;
1.101     ad       1050:                mutex_exit(&fp->f_lock);
1.1       cgd      1051:                unp_rights++;
                   1052:        }
1.101     ad       1053:        rw_exit(&fdescp->fd_lock);
1.73      martin   1054:
                   1055:        if (newcm) {
                   1056:                if (control->m_flags & M_EXT)
                   1057:                        MEXTREMOVE(control);
                   1058:                MEXTADD(control, newcm,
                   1059:                    CMSG_SPACE(nfds * sizeof(struct file *)),
                   1060:                    M_MBUF, NULL, NULL);
                   1061:                cm = newcm;
                   1062:        }
                   1063:
                   1064:        /* adjust message & mbuf to note amount of space actually used. */
                   1065:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
                   1066:        control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
                   1067:
1.1       cgd      1068:        return (0);
1.30      thorpej  1069: }
                   1070:
                   1071: struct mbuf *
1.92      ad       1072: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1073: {
                   1074:        struct cmsghdr *cmp;
                   1075:        struct sockcred *sc;
                   1076:        struct mbuf *m, *n;
1.47      thorpej  1077:        int len, space, i;
1.30      thorpej  1078:
1.92      ad       1079:        len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
                   1080:        space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30      thorpej  1081:
                   1082:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1083:        if (space > MLEN) {
                   1084:                if (space > MCLBYTES)
                   1085:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1086:                else
1.59      matt     1087:                        m_clget(m, M_WAIT);
1.30      thorpej  1088:                if ((m->m_flags & M_EXT) == 0) {
                   1089:                        m_free(m);
                   1090:                        return (control);
                   1091:                }
                   1092:        }
                   1093:
1.47      thorpej  1094:        m->m_len = space;
1.30      thorpej  1095:        m->m_next = NULL;
                   1096:        cmp = mtod(m, struct cmsghdr *);
                   1097:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1098:        cmp->cmsg_len = len;
                   1099:        cmp->cmsg_level = SOL_SOCKET;
                   1100:        cmp->cmsg_type = SCM_CREDS;
1.92      ad       1101:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1102:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1103:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1104:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1105:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30      thorpej  1106:        for (i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1107:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1108:
                   1109:        /*
                   1110:         * If a control message already exists, append us to the end.
                   1111:         */
                   1112:        if (control != NULL) {
                   1113:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1114:                        ;
                   1115:                n->m_next = m;
                   1116:        } else
                   1117:                control = m;
                   1118:
                   1119:        return (control);
1.1       cgd      1120: }
                   1121:
                   1122: int    unp_defer, unp_gcing;
                   1123: extern struct domain unixdomain;
                   1124:
1.39      sommerfe 1125: /*
                   1126:  * Comment added long after the fact explaining what's going on here.
                   1127:  * Do a mark-sweep GC of file descriptors on the system, to free up
                   1128:  * any which are caught in flight to an about-to-be-closed socket.
                   1129:  *
                   1130:  * Traditional mark-sweep gc's start at the "root", and mark
                   1131:  * everything reachable from the root (which, in our case would be the
                   1132:  * process table).  The mark bits are cleared during the sweep.
                   1133:  *
                   1134:  * XXX For some inexplicable reason (perhaps because the file
                   1135:  * descriptor tables used to live in the u area which could be swapped
                   1136:  * out and thus hard to reach), we do multiple scans over the set of
                   1137:  * descriptors, using use *two* mark bits per object (DEFER and MARK).
                   1138:  * Whenever we find a descriptor which references other descriptors,
                   1139:  * the ones it references are marked with both bits, and we iterate
                   1140:  * over the whole file table until there are no more DEFER bits set.
                   1141:  * We also make an extra pass *before* the GC to clear the mark bits,
                   1142:  * which could have been cleared at almost no cost during the previous
                   1143:  * sweep.
                   1144:  *
                   1145:  * XXX MP: this needs to run with locks such that no other thread of
                   1146:  * control can create or destroy references to file descriptors. it
                   1147:  * may be necessary to defer the GC until later (when the locking
                   1148:  * situation is more hospitable); it may be necessary to push this
                   1149:  * into a separate thread.
                   1150:  */
1.5       andrew   1151: void
1.76      matt     1152: unp_gc(void)
1.1       cgd      1153: {
1.46      augustss 1154:        struct file *fp, *nextfp;
                   1155:        struct socket *so, *so1;
1.8       mycroft  1156:        struct file **extra_ref, **fpp;
                   1157:        int nunref, i;
1.1       cgd      1158:
                   1159:        if (unp_gcing)
                   1160:                return;
                   1161:        unp_gcing = 1;
                   1162:        unp_defer = 0;
1.39      sommerfe 1163:
1.101     ad       1164:        mutex_enter(&filelist_lock);
                   1165:
1.39      sommerfe 1166:        /* Clear mark bits */
1.54      matt     1167:        LIST_FOREACH(fp, &filehead, f_list)
1.1       cgd      1168:                fp->f_flag &= ~(FMARK|FDEFER);
1.39      sommerfe 1169:
                   1170:        /*
                   1171:         * Iterate over the set of descriptors, marking ones believed
                   1172:         * (based on refcount) to be referenced from a process, and
                   1173:         * marking for rescan descriptors which are queued on a socket.
                   1174:         */
1.1       cgd      1175:        do {
1.54      matt     1176:                LIST_FOREACH(fp, &filehead, f_list) {
1.101     ad       1177:                        mutex_enter(&fp->f_lock);
                   1178:                        if (fp->f_flag & FDEFER) {
1.1       cgd      1179:                                fp->f_flag &= ~FDEFER;
                   1180:                                unp_defer--;
1.39      sommerfe 1181: #ifdef DIAGNOSTIC
                   1182:                                if (fp->f_count == 0)
                   1183:                                        panic("unp_gc: deferred unreferenced socket");
                   1184: #endif
1.1       cgd      1185:                        } else {
1.101     ad       1186:                                if (fp->f_count == 0 ||
                   1187:                                    (fp->f_flag & FMARK) ||
                   1188:                                    fp->f_count == fp->f_msgcount) {
                   1189:                                        mutex_exit(&fp->f_lock);
1.1       cgd      1190:                                        continue;
1.101     ad       1191:                                }
1.1       cgd      1192:                        }
1.39      sommerfe 1193:                        fp->f_flag |= FMARK;
                   1194:
1.1       cgd      1195:                        if (fp->f_type != DTYPE_SOCKET ||
1.101     ad       1196:                            (so = (struct socket *)fp->f_data) == 0 ||
                   1197:                            so->so_proto->pr_domain != &unixdomain ||
                   1198:                            (so->so_proto->pr_flags&PR_RIGHTS) == 0) {
                   1199:                                mutex_exit(&fp->f_lock);
1.1       cgd      1200:                                continue;
1.101     ad       1201:                        }
1.1       cgd      1202: #ifdef notdef
                   1203:                        if (so->so_rcv.sb_flags & SB_LOCK) {
                   1204:                                /*
                   1205:                                 * This is problematical; it's not clear
                   1206:                                 * we need to wait for the sockbuf to be
                   1207:                                 * unlocked (on a uniprocessor, at least),
                   1208:                                 * and it's also not clear what to do
                   1209:                                 * if sbwait returns an error due to receipt
                   1210:                                 * of a signal.  If sbwait does return
                   1211:                                 * an error, we'll go into an infinite
                   1212:                                 * loop.  Delete all of this for now.
                   1213:                                 */
                   1214:                                (void) sbwait(&so->so_rcv);
                   1215:                                goto restart;
                   1216:                        }
                   1217: #endif
1.101     ad       1218:                        mutex_exit(&fp->f_lock);
                   1219:
1.39      sommerfe 1220:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                   1221:                        /*
                   1222:                         * mark descriptors referenced from sockets queued on the accept queue as well.
                   1223:                         */
                   1224:                        if (so->so_options & SO_ACCEPTCONN) {
1.54      matt     1225:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1226:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1227:                                }
1.54      matt     1228:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1229:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1230:                                }
                   1231:                        }
1.1       cgd      1232:                }
                   1233:        } while (unp_defer);
1.101     ad       1234:
                   1235:        mutex_exit(&filelist_lock);
                   1236:
1.8       mycroft  1237:        /*
1.39      sommerfe 1238:         * Sweep pass.  Find unmarked descriptors, and free them.
                   1239:         *
1.8       mycroft  1240:         * We grab an extra reference to each of the file table entries
                   1241:         * that are not otherwise accessible and then free the rights
                   1242:         * that are stored in messages on them.
                   1243:         *
1.57      pk       1244:         * The bug in the original code is a little tricky, so I'll describe
1.8       mycroft  1245:         * what's wrong with it here.
                   1246:         *
                   1247:         * It is incorrect to simply unp_discard each entry for f_msgcount
                   1248:         * times -- consider the case of sockets A and B that contain
                   1249:         * references to each other.  On a last close of some other socket,
                   1250:         * we trigger a gc since the number of outstanding rights (unp_rights)
                   1251:         * is non-zero.  If during the sweep phase the gc code un_discards,
                   1252:         * we end up doing a (full) closef on the descriptor.  A closef on A
                   1253:         * results in the following chain.  Closef calls soo_close, which
                   1254:         * calls soclose.   Soclose calls first (through the switch
                   1255:         * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
                   1256:         * returns because the previous instance had set unp_gcing, and
                   1257:         * we return all the way back to soclose, which marks the socket
                   1258:         * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
                   1259:         * to free up the rights that are queued in messages on the socket A,
                   1260:         * i.e., the reference on B.  The sorflush calls via the dom_dispose
                   1261:         * switch unp_dispose, which unp_scans with unp_discard.  This second
                   1262:         * instance of unp_discard just calls closef on B.
                   1263:         *
                   1264:         * Well, a similar chain occurs on B, resulting in a sorflush on B,
                   1265:         * which results in another closef on A.  Unfortunately, A is already
                   1266:         * being closed, and the descriptor has already been marked with
                   1267:         * SS_NOFDREF, and soclose panics at this point.
                   1268:         *
                   1269:         * Here, we first take an extra reference to each inaccessible
1.39      sommerfe 1270:         * descriptor.  Then, if the inaccessible descriptor is a
                   1271:         * socket, we call sorflush in case it is a Unix domain
                   1272:         * socket.  After we destroy all the rights carried in
                   1273:         * messages, we do a last closef to get rid of our extra
                   1274:         * reference.  This is the last close, and the unp_detach etc
                   1275:         * will shut down the socket.
1.8       mycroft  1276:         *
                   1277:         * 91/09/19, bsy@cs.cmu.edu
                   1278:         */
1.101     ad       1279:        extra_ref = kmem_alloc(nfiles * sizeof(struct file *), KM_SLEEP);
                   1280:
                   1281:        mutex_enter(&filelist_lock);
1.54      matt     1282:        for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
1.11      mycroft  1283:            fp = nextfp) {
1.54      matt     1284:                nextfp = LIST_NEXT(fp, f_list);
1.101     ad       1285:                mutex_enter(&fp->f_lock);
1.57      pk       1286:                if (fp->f_count != 0 &&
                   1287:                    fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1.8       mycroft  1288:                        *fpp++ = fp;
                   1289:                        nunref++;
                   1290:                        fp->f_count++;
                   1291:                }
1.101     ad       1292:                mutex_exit(&fp->f_lock);
1.1       cgd      1293:        }
1.101     ad       1294:        mutex_exit(&filelist_lock);
                   1295:
1.39      sommerfe 1296:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1297:                fp = *fpp;
1.101     ad       1298:                mutex_enter(&fp->f_lock);
1.44      thorpej  1299:                FILE_USE(fp);
1.39      sommerfe 1300:                if (fp->f_type == DTYPE_SOCKET)
                   1301:                        sorflush((struct socket *)fp->f_data);
1.44      thorpej  1302:                FILE_UNUSE(fp, NULL);
1.39      sommerfe 1303:        }
1.44      thorpej  1304:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1305:                fp = *fpp;
1.101     ad       1306:                mutex_enter(&fp->f_lock);
1.44      thorpej  1307:                FILE_USE(fp);
1.86      christos 1308:                (void) closef(fp, (struct lwp *)0);
1.44      thorpej  1309:        }
1.101     ad       1310:        kmem_free(extra_ref, nfiles * sizeof(struct file *));
1.1       cgd      1311:        unp_gcing = 0;
                   1312: }
                   1313:
1.5       andrew   1314: void
1.76      matt     1315: unp_dispose(struct mbuf *m)
1.1       cgd      1316: {
1.8       mycroft  1317:
1.1       cgd      1318:        if (m)
1.39      sommerfe 1319:                unp_scan(m, unp_discard, 1);
1.1       cgd      1320: }
                   1321:
1.5       andrew   1322: void
1.76      matt     1323: unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard)
1.1       cgd      1324: {
1.46      augustss 1325:        struct mbuf *m;
                   1326:        struct file **rp;
                   1327:        struct cmsghdr *cm;
                   1328:        int i;
1.1       cgd      1329:        int qfds;
                   1330:
                   1331:        while (m0) {
1.48      thorpej  1332:                for (m = m0; m; m = m->m_next) {
1.1       cgd      1333:                        if (m->m_type == MT_CONTROL &&
                   1334:                            m->m_len >= sizeof(*cm)) {
                   1335:                                cm = mtod(m, struct cmsghdr *);
                   1336:                                if (cm->cmsg_level != SOL_SOCKET ||
                   1337:                                    cm->cmsg_type != SCM_RIGHTS)
                   1338:                                        continue;
1.48      thorpej  1339:                                qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1340:                                    / sizeof(struct file *);
                   1341:                                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe 1342:                                for (i = 0; i < qfds; i++) {
                   1343:                                        struct file *fp = *rp;
                   1344:                                        if (discard)
                   1345:                                                *rp = 0;
                   1346:                                        (*op)(fp);
                   1347:                                        rp++;
                   1348:                                }
1.1       cgd      1349:                                break;          /* XXX, but saves time */
                   1350:                        }
1.48      thorpej  1351:                }
1.52      thorpej  1352:                m0 = m0->m_nextpkt;
1.1       cgd      1353:        }
                   1354: }
                   1355:
1.5       andrew   1356: void
1.76      matt     1357: unp_mark(struct file *fp)
1.1       cgd      1358: {
1.101     ad       1359:
1.39      sommerfe 1360:        if (fp == NULL)
                   1361:                return;
1.80      perry    1362:
1.39      sommerfe 1363:        /* If we're already deferred, don't screw up the defer count */
1.101     ad       1364:        mutex_enter(&fp->f_lock);
                   1365:        if (fp->f_flag & (FMARK | FDEFER)) {
                   1366:                mutex_exit(&fp->f_lock);
1.1       cgd      1367:                return;
1.101     ad       1368:        }
1.39      sommerfe 1369:
                   1370:        /*
                   1371:         * Minimize the number of deferrals...  Sockets are the only
                   1372:         * type of descriptor which can hold references to another
                   1373:         * descriptor, so just mark other descriptors, and defer
                   1374:         * unmarked sockets for the next pass.
                   1375:         */
                   1376:        if (fp->f_type == DTYPE_SOCKET) {
                   1377:                unp_defer++;
                   1378:                if (fp->f_count == 0)
                   1379:                        panic("unp_mark: queued unref");
                   1380:                fp->f_flag |= FDEFER;
                   1381:        } else {
                   1382:                fp->f_flag |= FMARK;
                   1383:        }
1.101     ad       1384:        mutex_exit(&fp->f_lock);
1.39      sommerfe 1385:        return;
1.1       cgd      1386: }
                   1387:
1.5       andrew   1388: void
1.76      matt     1389: unp_discard(struct file *fp)
1.1       cgd      1390: {
1.39      sommerfe 1391:        if (fp == NULL)
                   1392:                return;
1.101     ad       1393:        mutex_enter(&fp->f_lock);
1.57      pk       1394:        fp->f_usecount++;       /* i.e. FILE_USE(fp) sans locking */
1.1       cgd      1395:        fp->f_msgcount--;
1.101     ad       1396:        mutex_exit(&fp->f_lock);
1.1       cgd      1397:        unp_rights--;
1.86      christos 1398:        (void) closef(fp, (struct lwp *)0);
1.1       cgd      1399: }

CVSweb <webmaster@jp.NetBSD.org>