[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.139.2.2

1.139.2.2! tls         1: /*     $NetBSD: uipc_usrreq.c,v 1.139.2.1 2012/11/20 03:02:44 tls Exp $        */
1.30      thorpej     2:
                      3: /*-
1.121     mrg         4:  * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.121     mrg         9:  * NASA Ames Research Center, and by Andrew Doran.
1.30      thorpej    10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     21:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     22:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     23:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     24:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     25:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     26:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     27:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     28:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     29:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     30:  * POSSIBILITY OF SUCH DAMAGE.
                     31:  */
1.10      cgd        32:
1.1       cgd        33: /*
1.8       mycroft    34:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     35:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        36:  *
                     37:  * Redistribution and use in source and binary forms, with or without
                     38:  * modification, are permitted provided that the following conditions
                     39:  * are met:
                     40:  * 1. Redistributions of source code must retain the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer.
                     42:  * 2. Redistributions in binary form must reproduce the above copyright
                     43:  *    notice, this list of conditions and the following disclaimer in the
                     44:  *    documentation and/or other materials provided with the distribution.
1.67      agc        45:  * 3. Neither the name of the University nor the names of its contributors
                     46:  *    may be used to endorse or promote products derived from this software
                     47:  *    without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     50:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     51:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     52:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     53:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     54:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     55:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     56:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     57:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     58:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     59:  * SUCH DAMAGE.
                     60:  *
                     61:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     62:  */
                     63:
                     64: /*
                     65:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     66:  *
                     67:  * Redistribution and use in source and binary forms, with or without
                     68:  * modification, are permitted provided that the following conditions
                     69:  * are met:
                     70:  * 1. Redistributions of source code must retain the above copyright
                     71:  *    notice, this list of conditions and the following disclaimer.
                     72:  * 2. Redistributions in binary form must reproduce the above copyright
                     73:  *    notice, this list of conditions and the following disclaimer in the
                     74:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        75:  * 3. All advertising materials mentioning features or use of this software
                     76:  *    must display the following acknowledgement:
                     77:  *     This product includes software developed by the University of
                     78:  *     California, Berkeley and its contributors.
                     79:  * 4. Neither the name of the University nor the names of its contributors
                     80:  *    may be used to endorse or promote products derived from this software
                     81:  *    without specific prior written permission.
                     82:  *
                     83:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     84:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     85:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     86:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     87:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     88:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     89:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     90:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     91:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     92:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     93:  * SUCH DAMAGE.
                     94:  *
1.31      fvdl       95:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd        96:  */
1.53      lukem      97:
                     98: #include <sys/cdefs.h>
1.139.2.2! tls        99: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.139.2.1 2012/11/20 03:02:44 tls Exp $");
1.1       cgd       100:
1.7       mycroft   101: #include <sys/param.h>
1.8       mycroft   102: #include <sys/systm.h>
1.7       mycroft   103: #include <sys/proc.h>
                    104: #include <sys/filedesc.h>
                    105: #include <sys/domain.h>
                    106: #include <sys/protosw.h>
                    107: #include <sys/socket.h>
                    108: #include <sys/socketvar.h>
                    109: #include <sys/unpcb.h>
                    110: #include <sys/un.h>
                    111: #include <sys/namei.h>
                    112: #include <sys/vnode.h>
                    113: #include <sys/file.h>
                    114: #include <sys/stat.h>
                    115: #include <sys/mbuf.h>
1.91      elad      116: #include <sys/kauth.h>
1.101     ad        117: #include <sys/kmem.h>
1.106     ad        118: #include <sys/atomic.h>
1.119     pooka     119: #include <sys/uidinfo.h>
1.121     mrg       120: #include <sys/kernel.h>
                    121: #include <sys/kthread.h>
1.1       cgd       122:
                    123: /*
                    124:  * Unix communications domain.
                    125:  *
                    126:  * TODO:
1.134     manu      127:  *     RDM
1.1       cgd       128:  *     rethink name space problems
                    129:  *     need a proper out-of-band
1.112     ad        130:  *
                    131:  * Notes on locking:
                    132:  *
                    133:  * The generic rules noted in uipc_socket2.c apply.  In addition:
                    134:  *
                    135:  * o We have a global lock, uipc_lock.
                    136:  *
                    137:  * o All datagram sockets are locked by uipc_lock.
                    138:  *
                    139:  * o For stream socketpairs, the two endpoints are created sharing the same
                    140:  *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
                    141:  *   matching locks.
                    142:  *
                    143:  * o Stream sockets created via socket() start life with their own
                    144:  *   independent lock.
                    145:  *
                    146:  * o Stream connections to a named endpoint are slightly more complicated.
                    147:  *   Sockets that have called listen() have their lock pointer mutated to
                    148:  *   the global uipc_lock.  When establishing a connection, the connecting
                    149:  *   socket also has its lock mutated to uipc_lock, which matches the head
                    150:  *   (listening socket).  We create a new socket for accept() to return, and
                    151:  *   that also shares the head's lock.  Until the connection is completely
                    152:  *   done on both ends, all three sockets are locked by uipc_lock.  Once the
                    153:  *   connection is complete, the association with the head's lock is broken.
                    154:  *   The connecting socket and the socket returned from accept() have their
                    155:  *   lock pointers mutated away from uipc_lock, and back to the connecting
                    156:  *   socket's original, independent lock.  The head continues to be locked
                    157:  *   by uipc_lock.
                    158:  *
                    159:  * o If uipc_lock is determined to be a significant source of contention,
                    160:  *   it could easily be hashed out.  It is difficult to simply make it an
                    161:  *   independent lock because of visibility / garbage collection issues:
                    162:  *   if a socket has been associated with a lock at any point, that lock
                    163:  *   must remain valid until the socket is no longer visible in the system.
                    164:  *   The lock must not be freed or otherwise destroyed until any sockets
                    165:  *   that had referenced it have also been destroyed.
1.1       cgd       166:  */
1.93      christos  167: const struct sockaddr_un sun_noname = {
                    168:        .sun_len = sizeof(sun_noname),
                    169:        .sun_family = AF_LOCAL,
                    170: };
1.1       cgd       171: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    172:
1.92      ad        173: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.121     mrg       174: static void unp_mark(file_t *);
                    175: static void unp_scan(struct mbuf *, void (*)(file_t *), int);
                    176: static void unp_discard_now(file_t *);
                    177: static void unp_discard_later(file_t *);
                    178: static void unp_thread(void *);
                    179: static void unp_thread_kick(void);
1.112     ad        180: static kmutex_t *uipc_lock;
                    181:
1.121     mrg       182: static kcondvar_t unp_thread_cv;
                    183: static lwp_t *unp_thread_lwp;
                    184: static SLIST_HEAD(,file) unp_thread_discard;
                    185: static int unp_defer;
                    186:
1.112     ad        187: /*
                    188:  * Initialize Unix protocols.
                    189:  */
                    190: void
                    191: uipc_init(void)
                    192: {
1.121     mrg       193:        int error;
1.112     ad        194:
                    195:        uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
1.121     mrg       196:        cv_init(&unp_thread_cv, "unpgc");
                    197:
                    198:        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
                    199:            NULL, &unp_thread_lwp, "unpgc");
                    200:        if (error != 0)
                    201:                panic("uipc_init %d", error);
1.112     ad        202: }
                    203:
                    204: /*
                    205:  * A connection succeeded: disassociate both endpoints from the head's
                    206:  * lock, and make them share their own lock.  There is a race here: for
                    207:  * a very brief time one endpoint will be locked by a different lock
                    208:  * than the other end.  However, since the current thread holds the old
                    209:  * lock (the listening socket's lock, the head) access can still only be
                    210:  * made to one side of the connection.
                    211:  */
                    212: static void
                    213: unp_setpeerlocks(struct socket *so, struct socket *so2)
                    214: {
                    215:        struct unpcb *unp;
                    216:        kmutex_t *lock;
                    217:
                    218:        KASSERT(solocked2(so, so2));
                    219:
                    220:        /*
                    221:         * Bail out if either end of the socket is not yet fully
                    222:         * connected or accepted.  We only break the lock association
                    223:         * with the head when the pair of sockets stand completely
                    224:         * on their own.
                    225:         */
1.125     yamt      226:        KASSERT(so->so_head == NULL);
                    227:        if (so2->so_head != NULL)
1.112     ad        228:                return;
                    229:
                    230:        /*
                    231:         * Drop references to old lock.  A third reference (from the
                    232:         * queue head) must be held as we still hold its lock.  Bonus:
                    233:         * we don't need to worry about garbage collecting the lock.
                    234:         */
                    235:        lock = so->so_lock;
                    236:        KASSERT(lock == uipc_lock);
                    237:        mutex_obj_free(lock);
                    238:        mutex_obj_free(lock);
                    239:
                    240:        /*
                    241:         * Grab stream lock from the initiator and share between the two
                    242:         * endpoints.  Issue memory barrier to ensure all modifications
                    243:         * become globally visible before the lock change.  so2 is
                    244:         * assumed not to have a stream lock, because it was created
                    245:         * purely for the server side to accept this connection and
                    246:         * started out life using the domain-wide lock.
                    247:         */
                    248:        unp = sotounpcb(so);
                    249:        KASSERT(unp->unp_streamlock != NULL);
                    250:        KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
                    251:        lock = unp->unp_streamlock;
                    252:        unp->unp_streamlock = NULL;
                    253:        mutex_obj_hold(lock);
                    254:        membar_exit();
1.127     bouyer    255:        /*
                    256:         * possible race if lock is not held - see comment in
                    257:         * uipc_usrreq(PRU_ACCEPT).
                    258:         */
                    259:        KASSERT(mutex_owned(lock));
1.115     ad        260:        solockreset(so, lock);
                    261:        solockreset(so2, lock);
1.112     ad        262: }
                    263:
                    264: /*
                    265:  * Reset a socket's lock back to the domain-wide lock.
                    266:  */
                    267: static void
                    268: unp_resetlock(struct socket *so)
                    269: {
                    270:        kmutex_t *olock, *nlock;
                    271:        struct unpcb *unp;
                    272:
                    273:        KASSERT(solocked(so));
                    274:
                    275:        olock = so->so_lock;
                    276:        nlock = uipc_lock;
                    277:        if (olock == nlock)
                    278:                return;
                    279:        unp = sotounpcb(so);
                    280:        KASSERT(unp->unp_streamlock == NULL);
                    281:        unp->unp_streamlock = olock;
                    282:        mutex_obj_hold(nlock);
                    283:        mutex_enter(nlock);
1.115     ad        284:        solockreset(so, nlock);
1.112     ad        285:        mutex_exit(olock);
                    286: }
                    287:
                    288: static void
                    289: unp_free(struct unpcb *unp)
                    290: {
                    291:
                    292:        if (unp->unp_addr)
                    293:                free(unp->unp_addr, M_SONAME);
                    294:        if (unp->unp_streamlock != NULL)
                    295:                mutex_obj_free(unp->unp_streamlock);
                    296:        free(unp, M_PCB);
                    297: }
1.30      thorpej   298:
1.20      mycroft   299: int
1.76      matt      300: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        301:        struct lwp *l)
1.20      mycroft   302: {
                    303:        struct socket *so2;
1.77      matt      304:        const struct sockaddr_un *sun;
1.20      mycroft   305:
                    306:        so2 = unp->unp_conn->unp_socket;
1.112     ad        307:
                    308:        KASSERT(solocked(so2));
                    309:
1.20      mycroft   310:        if (unp->unp_addr)
                    311:                sun = unp->unp_addr;
                    312:        else
                    313:                sun = &sun_noname;
1.30      thorpej   314:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        315:                control = unp_addsockcred(l, control);
1.82      christos  316:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   317:            control) == 0) {
1.112     ad        318:                so2->so_rcv.sb_overflowed++;
1.98      martin    319:                unp_dispose(control);
1.20      mycroft   320:                m_freem(control);
                    321:                m_freem(m);
1.60      christos  322:                return (ENOBUFS);
1.20      mycroft   323:        } else {
                    324:                sorwakeup(so2);
                    325:                return (0);
                    326:        }
                    327: }
                    328:
                    329: void
1.112     ad        330: unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr)
1.20      mycroft   331: {
1.77      matt      332:        const struct sockaddr_un *sun;
1.112     ad        333:        struct unpcb *unp;
                    334:        bool ext;
1.20      mycroft   335:
1.127     bouyer    336:        KASSERT(solocked(so));
1.112     ad        337:        unp = sotounpcb(so);
                    338:        ext = false;
1.20      mycroft   339:
1.112     ad        340:        for (;;) {
                    341:                sun = NULL;
                    342:                if (peeraddr) {
                    343:                        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    344:                                sun = unp->unp_conn->unp_addr;
                    345:                } else {
                    346:                        if (unp->unp_addr)
                    347:                                sun = unp->unp_addr;
                    348:                }
                    349:                if (sun == NULL)
                    350:                        sun = &sun_noname;
                    351:                nam->m_len = sun->sun_len;
                    352:                if (nam->m_len > MLEN && !ext) {
                    353:                        sounlock(so);
                    354:                        MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK);
                    355:                        solock(so);
                    356:                        ext = true;
                    357:                } else {
                    358:                        KASSERT(nam->m_len <= MAXPATHLEN * 2);
                    359:                        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
                    360:                        break;
                    361:                }
                    362:        }
1.20      mycroft   363: }
                    364:
1.1       cgd       365: /*ARGSUSED*/
1.5       andrew    366: int
1.76      matt      367: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86      christos  368:        struct mbuf *control, struct lwp *l)
1.1       cgd       369: {
                    370:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  371:        struct socket *so2;
1.86      christos  372:        struct proc *p;
1.75      christos  373:        u_int newhiwat;
1.46      augustss  374:        int error = 0;
1.1       cgd       375:
                    376:        if (req == PRU_CONTROL)
                    377:                return (EOPNOTSUPP);
1.20      mycroft   378:
1.22      mycroft   379: #ifdef DIAGNOSTIC
                    380:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    381:                panic("uipc_usrreq: unexpected control mbuf");
                    382: #endif
1.86      christos  383:        p = l ? l->l_proc : NULL;
1.112     ad        384:        if (req != PRU_ATTACH) {
1.122     yamt      385:                if (unp == NULL) {
1.112     ad        386:                        error = EINVAL;
                    387:                        goto release;
                    388:                }
                    389:                KASSERT(solocked(so));
1.1       cgd       390:        }
1.20      mycroft   391:
1.1       cgd       392:        switch (req) {
                    393:
                    394:        case PRU_ATTACH:
1.122     yamt      395:                if (unp != NULL) {
1.1       cgd       396:                        error = EISCONN;
                    397:                        break;
                    398:                }
                    399:                error = unp_attach(so);
                    400:                break;
                    401:
                    402:        case PRU_DETACH:
                    403:                unp_detach(unp);
                    404:                break;
                    405:
                    406:        case PRU_BIND:
1.90      christos  407:                KASSERT(l != NULL);
1.112     ad        408:                error = unp_bind(so, nam, l);
1.1       cgd       409:                break;
                    410:
                    411:        case PRU_LISTEN:
1.112     ad        412:                /*
                    413:                 * If the socket can accept a connection, it must be
                    414:                 * locked by uipc_lock.
                    415:                 */
                    416:                unp_resetlock(so);
1.122     yamt      417:                if (unp->unp_vnode == NULL)
1.1       cgd       418:                        error = EINVAL;
                    419:                break;
                    420:
                    421:        case PRU_CONNECT:
1.90      christos  422:                KASSERT(l != NULL);
1.86      christos  423:                error = unp_connect(so, nam, l);
1.1       cgd       424:                break;
                    425:
                    426:        case PRU_CONNECT2:
1.72      matt      427:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       428:                break;
                    429:
                    430:        case PRU_DISCONNECT:
                    431:                unp_disconnect(unp);
                    432:                break;
                    433:
                    434:        case PRU_ACCEPT:
1.112     ad        435:                KASSERT(so->so_lock == uipc_lock);
1.72      matt      436:                /*
                    437:                 * Mark the initiating STREAM socket as connected *ONLY*
                    438:                 * after it's been accepted.  This prevents a client from
                    439:                 * overrunning a server and receiving ECONNREFUSED.
                    440:                 */
1.112     ad        441:                if (unp->unp_conn == NULL)
                    442:                        break;
                    443:                so2 = unp->unp_conn->unp_socket;
                    444:                if (so2->so_state & SS_ISCONNECTING) {
                    445:                        KASSERT(solocked2(so, so->so_head));
                    446:                        KASSERT(solocked2(so2, so->so_head));
                    447:                        soisconnected(so2);
                    448:                }
                    449:                /*
                    450:                 * If the connection is fully established, break the
                    451:                 * association with uipc_lock and give the connected
                    452:                 * pair a seperate lock to share.
1.127     bouyer    453:                 * There is a race here: sotounpcb(so2)->unp_streamlock
                    454:                 * is not locked, so when changing so2->so_lock
                    455:                 * another thread can grab it while so->so_lock is still
                    456:                 * pointing to the (locked) uipc_lock.
1.129     wiz       457:                 * this should be harmless, except that this makes
1.127     bouyer    458:                 * solocked2() and solocked() unreliable.
                    459:                 * Another problem is that unp_setaddr() expects the
                    460:                 * the socket locked. Grabing sotounpcb(so2)->unp_streamlock
                    461:                 * fixes both issues.
1.112     ad        462:                 */
1.127     bouyer    463:                mutex_enter(sotounpcb(so2)->unp_streamlock);
1.112     ad        464:                unp_setpeerlocks(so2, so);
                    465:                /*
                    466:                 * Only now return peer's address, as we may need to
                    467:                 * block in order to allocate memory.
                    468:                 *
                    469:                 * XXX Minor race: connection can be broken while
                    470:                 * lock is dropped in unp_setaddr().  We will return
                    471:                 * error == 0 and sun_noname as the peer address.
                    472:                 */
                    473:                unp_setaddr(so, nam, true);
1.127     bouyer    474:                /* so_lock now points to unp_streamlock */
                    475:                mutex_exit(so2->so_lock);
1.1       cgd       476:                break;
                    477:
                    478:        case PRU_SHUTDOWN:
                    479:                socantsendmore(so);
                    480:                unp_shutdown(unp);
                    481:                break;
                    482:
                    483:        case PRU_RCVD:
                    484:                switch (so->so_type) {
                    485:
                    486:                case SOCK_DGRAM:
                    487:                        panic("uipc 1");
                    488:                        /*NOTREACHED*/
                    489:
1.134     manu      490:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       491:                case SOCK_STREAM:
                    492: #define        rcv (&so->so_rcv)
                    493: #define snd (&so2->so_snd)
                    494:                        if (unp->unp_conn == 0)
                    495:                                break;
                    496:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        497:                        KASSERT(solocked2(so, so2));
1.1       cgd       498:                        /*
                    499:                         * Adjust backpressure on sender
                    500:                         * and wakeup any waiting to write.
                    501:                         */
                    502:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    503:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  504:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  505:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  506:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       507:                        unp->unp_cc = rcv->sb_cc;
                    508:                        sowwakeup(so2);
                    509: #undef snd
                    510: #undef rcv
                    511:                        break;
                    512:
                    513:                default:
                    514:                        panic("uipc 2");
                    515:                }
                    516:                break;
                    517:
                    518:        case PRU_SEND:
1.30      thorpej   519:                /*
                    520:                 * Note: unp_internalize() rejects any control message
                    521:                 * other than SCM_RIGHTS, and only allows one.  This
                    522:                 * has the side-effect of preventing a caller from
                    523:                 * forging SCM_CREDS.
                    524:                 */
1.90      christos  525:                if (control) {
1.112     ad        526:                        sounlock(so);
                    527:                        error = unp_internalize(&control);
                    528:                        solock(so);
                    529:                        if (error != 0) {
1.111     mlelstv   530:                                m_freem(control);
                    531:                                m_freem(m);
                    532:                                break;
                    533:                        }
1.83      yamt      534:                }
1.1       cgd       535:                switch (so->so_type) {
                    536:
                    537:                case SOCK_DGRAM: {
1.112     ad        538:                        KASSERT(so->so_lock == uipc_lock);
1.1       cgd       539:                        if (nam) {
1.111     mlelstv   540:                                if ((so->so_state & SS_ISCONNECTED) != 0)
1.1       cgd       541:                                        error = EISCONN;
1.111     mlelstv   542:                                else {
1.112     ad        543:                                        /*
                    544:                                         * Note: once connected, the
                    545:                                         * socket's lock must not be
                    546:                                         * dropped until we have sent
                    547:                                         * the message and disconnected.
                    548:                                         * This is necessary to prevent
                    549:                                         * intervening control ops, like
                    550:                                         * another connection.
                    551:                                         */
1.111     mlelstv   552:                                        error = unp_connect(so, nam, l);
1.20      mycroft   553:                                }
1.1       cgd       554:                        } else {
1.111     mlelstv   555:                                if ((so->so_state & SS_ISCONNECTED) == 0)
1.1       cgd       556:                                        error = ENOTCONN;
1.111     mlelstv   557:                        }
                    558:                        if (error) {
                    559:                                unp_dispose(control);
                    560:                                m_freem(control);
                    561:                                m_freem(m);
                    562:                                break;
1.1       cgd       563:                        }
1.89      christos  564:                        KASSERT(p != NULL);
1.92      ad        565:                        error = unp_output(m, control, unp, l);
1.1       cgd       566:                        if (nam)
                    567:                                unp_disconnect(unp);
                    568:                        break;
                    569:                }
                    570:
1.134     manu      571:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       572:                case SOCK_STREAM:
                    573: #define        rcv (&so2->so_rcv)
                    574: #define        snd (&so->so_snd)
1.87      christos  575:                        if (unp->unp_conn == NULL) {
                    576:                                error = ENOTCONN;
                    577:                                break;
                    578:                        }
1.1       cgd       579:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        580:                        KASSERT(solocked2(so, so2));
1.30      thorpej   581:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    582:                                /*
                    583:                                 * Credentials are passed only once on
1.134     manu      584:                                 * SOCK_STREAM and SOCK_SEQPACKET.
1.30      thorpej   585:                                 */
                    586:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        587:                                control = unp_addsockcred(l, control);
1.30      thorpej   588:                        }
1.1       cgd       589:                        /*
                    590:                         * Send to paired receive port, and then reduce
                    591:                         * send buffer hiwater marks to maintain backpressure.
                    592:                         * Wake up readers.
                    593:                         */
                    594:                        if (control) {
1.112     ad        595:                                if (sbappendcontrol(rcv, m, control) != 0)
                    596:                                        control = NULL;
1.134     manu      597:                        } else {
                    598:                                switch(so->so_type) {
                    599:                                case SOCK_SEQPACKET:
                    600:                                        sbappendrecord(rcv, m);
                    601:                                        break;
                    602:                                case SOCK_STREAM:
                    603:                                        sbappend(rcv, m);
                    604:                                        break;
                    605:                                default:
                    606:                                        panic("uipc_usrreq");
                    607:                                        break;
                    608:                                }
                    609:                        }
1.1       cgd       610:                        snd->sb_mbmax -=
                    611:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    612:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  613:                        newhiwat = snd->sb_hiwat -
                    614:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  615:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  616:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       617:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    618:                        sorwakeup(so2);
                    619: #undef snd
                    620: #undef rcv
1.112     ad        621:                        if (control != NULL) {
                    622:                                unp_dispose(control);
                    623:                                m_freem(control);
                    624:                        }
1.1       cgd       625:                        break;
                    626:
                    627:                default:
                    628:                        panic("uipc 4");
                    629:                }
                    630:                break;
                    631:
                    632:        case PRU_ABORT:
1.112     ad        633:                (void)unp_drop(unp, ECONNABORTED);
1.39      sommerfe  634:
1.88      matt      635:                KASSERT(so->so_head == NULL);
1.39      sommerfe  636: #ifdef DIAGNOSTIC
1.122     yamt      637:                if (so->so_pcb == NULL)
1.39      sommerfe  638:                        panic("uipc 5: drop killed pcb");
                    639: #endif
                    640:                unp_detach(unp);
1.1       cgd       641:                break;
                    642:
                    643:        case PRU_SENSE:
                    644:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
1.134     manu      645:                switch (so->so_type) {
                    646:                case SOCK_SEQPACKET: /* FALLTHROUGH */
                    647:                case SOCK_STREAM:
                    648:                        if (unp->unp_conn == 0)
                    649:                                break;
                    650:
1.1       cgd       651:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        652:                        KASSERT(solocked2(so, so2));
1.1       cgd       653:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
1.134     manu      654:                        break;
                    655:                default:
                    656:                        break;
1.1       cgd       657:                }
                    658:                ((struct stat *) m)->st_dev = NODEV;
                    659:                if (unp->unp_ino == 0)
                    660:                        unp->unp_ino = unp_ino++;
1.25      kleink    661:                ((struct stat *) m)->st_atimespec =
                    662:                    ((struct stat *) m)->st_mtimespec =
                    663:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       664:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    665:                return (0);
                    666:
                    667:        case PRU_RCVOOB:
1.20      mycroft   668:                error = EOPNOTSUPP;
                    669:                break;
1.1       cgd       670:
                    671:        case PRU_SENDOOB:
1.22      mycroft   672:                m_freem(control);
1.20      mycroft   673:                m_freem(m);
1.1       cgd       674:                error = EOPNOTSUPP;
                    675:                break;
                    676:
                    677:        case PRU_SOCKADDR:
1.112     ad        678:                unp_setaddr(so, nam, false);
1.1       cgd       679:                break;
                    680:
                    681:        case PRU_PEERADDR:
1.112     ad        682:                unp_setaddr(so, nam, true);
1.1       cgd       683:                break;
                    684:
                    685:        default:
                    686:                panic("piusrreq");
                    687:        }
1.20      mycroft   688:
1.1       cgd       689: release:
                    690:        return (error);
                    691: }
                    692:
                    693: /*
1.30      thorpej   694:  * Unix domain socket option processing.
                    695:  */
                    696: int
1.118     plunky    697: uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1.30      thorpej   698: {
                    699:        struct unpcb *unp = sotounpcb(so);
                    700:        int optval = 0, error = 0;
                    701:
1.112     ad        702:        KASSERT(solocked(so));
                    703:
1.118     plunky    704:        if (sopt->sopt_level != 0) {
1.100     dyoung    705:                error = ENOPROTOOPT;
1.30      thorpej   706:        } else switch (op) {
                    707:
                    708:        case PRCO_SETOPT:
1.118     plunky    709:                switch (sopt->sopt_name) {
1.30      thorpej   710:                case LOCAL_CREDS:
1.72      matt      711:                case LOCAL_CONNWAIT:
1.118     plunky    712:                        error = sockopt_getint(sopt, &optval);
                    713:                        if (error)
                    714:                                break;
                    715:                        switch (sopt->sopt_name) {
1.30      thorpej   716: #define        OPTSET(bit) \
                    717:        if (optval) \
                    718:                unp->unp_flags |= (bit); \
                    719:        else \
                    720:                unp->unp_flags &= ~(bit);
                    721:
1.118     plunky    722:                        case LOCAL_CREDS:
                    723:                                OPTSET(UNP_WANTCRED);
                    724:                                break;
                    725:                        case LOCAL_CONNWAIT:
                    726:                                OPTSET(UNP_CONNWAIT);
                    727:                                break;
1.30      thorpej   728:                        }
                    729:                        break;
                    730: #undef OPTSET
                    731:
                    732:                default:
                    733:                        error = ENOPROTOOPT;
                    734:                        break;
                    735:                }
                    736:                break;
                    737:
                    738:        case PRCO_GETOPT:
1.112     ad        739:                sounlock(so);
1.118     plunky    740:                switch (sopt->sopt_name) {
1.99      he        741:                case LOCAL_PEEREID:
                    742:                        if (unp->unp_flags & UNP_EIDSVALID) {
1.118     plunky    743:                                error = sockopt_set(sopt,
                    744:                                    &unp->unp_connid, sizeof(unp->unp_connid));
1.99      he        745:                        } else {
                    746:                                error = EINVAL;
                    747:                        }
                    748:                        break;
1.30      thorpej   749:                case LOCAL_CREDS:
                    750: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    751:
1.99      he        752:                        optval = OPTBIT(UNP_WANTCRED);
1.118     plunky    753:                        error = sockopt_setint(sopt, optval);
1.30      thorpej   754:                        break;
                    755: #undef OPTBIT
                    756:
                    757:                default:
                    758:                        error = ENOPROTOOPT;
                    759:                        break;
                    760:                }
1.112     ad        761:                solock(so);
1.30      thorpej   762:                break;
                    763:        }
                    764:        return (error);
                    765: }
                    766:
                    767: /*
1.1       cgd       768:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    769:  * for stream sockets, although the total for sender and receiver is
                    770:  * actually only PIPSIZ.
                    771:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    772:  * and don't really want to reserve the sendspace.  Their recvspace should
                    773:  * be large enough for at least one max-size datagram plus address.
                    774:  */
                    775: #define        PIPSIZ  4096
                    776: u_long unpst_sendspace = PIPSIZ;
                    777: u_long unpst_recvspace = PIPSIZ;
                    778: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    779: u_long unpdg_recvspace = 4*1024;
                    780:
1.121     mrg       781: u_int  unp_rights;                     /* files in flight */
                    782: u_int  unp_rights_ratio = 2;           /* limit, fraction of maxfiles */
1.1       cgd       783:
1.5       andrew    784: int
1.76      matt      785: unp_attach(struct socket *so)
1.1       cgd       786: {
1.46      augustss  787:        struct unpcb *unp;
1.1       cgd       788:        int error;
1.80      perry     789:
1.112     ad        790:        switch (so->so_type) {
1.134     manu      791:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.112     ad        792:        case SOCK_STREAM:
                    793:                if (so->so_lock == NULL) {
                    794:                        /*
                    795:                         * XXX Assuming that no socket locks are held,
                    796:                         * as this call may sleep.
                    797:                         */
                    798:                        so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                    799:                        solock(so);
                    800:                }
                    801:                if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1.1       cgd       802:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
1.112     ad        803:                        if (error != 0)
                    804:                                return (error);
                    805:                }
                    806:                break;
1.1       cgd       807:
1.112     ad        808:        case SOCK_DGRAM:
                    809:                if (so->so_lock == NULL) {
                    810:                        mutex_obj_hold(uipc_lock);
                    811:                        so->so_lock = uipc_lock;
                    812:                        solock(so);
                    813:                }
                    814:                if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1.1       cgd       815:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
1.112     ad        816:                        if (error != 0)
                    817:                                return (error);
                    818:                }
                    819:                break;
1.8       mycroft   820:
1.112     ad        821:        default:
                    822:                panic("unp_attach");
1.1       cgd       823:        }
1.112     ad        824:        KASSERT(solocked(so));
1.14      mycroft   825:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    826:        if (unp == NULL)
1.1       cgd       827:                return (ENOBUFS);
1.123     yamt      828:        memset(unp, 0, sizeof(*unp));
1.14      mycroft   829:        unp->unp_socket = so;
1.15      mycroft   830:        so->so_pcb = unp;
1.85      simonb    831:        nanotime(&unp->unp_ctime);
1.1       cgd       832:        return (0);
                    833: }
                    834:
1.17      pk        835: void
1.76      matt      836: unp_detach(struct unpcb *unp)
1.1       cgd       837: {
1.112     ad        838:        struct socket *so;
                    839:        vnode_t *vp;
                    840:
                    841:        so = unp->unp_socket;
1.80      perry     842:
1.112     ad        843:  retry:
                    844:        if ((vp = unp->unp_vnode) != NULL) {
                    845:                sounlock(so);
                    846:                /* Acquire v_interlock to protect against unp_connect(). */
1.113     ad        847:                /* XXXAD racy */
1.135     rmind     848:                mutex_enter(vp->v_interlock);
1.112     ad        849:                vp->v_socket = NULL;
                    850:                vrelel(vp, 0);
                    851:                solock(so);
                    852:                unp->unp_vnode = NULL;
1.1       cgd       853:        }
                    854:        if (unp->unp_conn)
                    855:                unp_disconnect(unp);
1.112     ad        856:        while (unp->unp_refs) {
                    857:                KASSERT(solocked2(so, unp->unp_refs->unp_socket));
                    858:                if (unp_drop(unp->unp_refs, ECONNRESET)) {
                    859:                        solock(so);
                    860:                        goto retry;
                    861:                }
                    862:        }
                    863:        soisdisconnected(so);
                    864:        so->so_pcb = NULL;
1.8       mycroft   865:        if (unp_rights) {
                    866:                /*
1.121     mrg       867:                 * Normally the receive buffer is flushed later, in sofree,
                    868:                 * but if our receive buffer holds references to files that
                    869:                 * are now garbage, we will enqueue those file references to
                    870:                 * the garbage collector and kick it into action.
1.8       mycroft   871:                 */
1.112     ad        872:                sorflush(so);
                    873:                unp_free(unp);
1.121     mrg       874:                unp_thread_kick();
1.14      mycroft   875:        } else
1.112     ad        876:                unp_free(unp);
1.1       cgd       877: }
                    878:
1.5       andrew    879: int
1.112     ad        880: unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       881: {
1.27      thorpej   882:        struct sockaddr_un *sun;
1.112     ad        883:        struct unpcb *unp;
1.106     ad        884:        vnode_t *vp;
1.1       cgd       885:        struct vattr vattr;
1.27      thorpej   886:        size_t addrlen;
1.1       cgd       887:        int error;
1.133     dholland  888:        struct pathbuf *pb;
1.1       cgd       889:        struct nameidata nd;
1.112     ad        890:        proc_t *p;
1.1       cgd       891:
1.112     ad        892:        unp = sotounpcb(so);
                    893:        if (unp->unp_vnode != NULL)
1.20      mycroft   894:                return (EINVAL);
1.109     ad        895:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                    896:                /*
                    897:                 * EALREADY may not be strictly accurate, but since this
                    898:                 * is a major application error it's hardly a big deal.
                    899:                 */
                    900:                return (EALREADY);
                    901:        }
                    902:        unp->unp_flags |= UNP_BUSY;
1.112     ad        903:        sounlock(so);
1.109     ad        904:
1.27      thorpej   905:        /*
                    906:         * Allocate the new sockaddr.  We have to allocate one
                    907:         * extra byte so that we can ensure that the pathname
                    908:         * is nul-terminated.
                    909:         */
1.112     ad        910:        p = l->l_proc;
1.27      thorpej   911:        addrlen = nam->m_len + 1;
                    912:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  913:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   914:        *(((char *)sun) + nam->m_len) = '\0';
                    915:
1.133     dholland  916:        pb = pathbuf_create(sun->sun_path);
                    917:        if (pb == NULL) {
                    918:                error = ENOMEM;
                    919:                goto bad;
                    920:        }
                    921:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);
1.27      thorpej   922:
1.1       cgd       923: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.133     dholland  924:        if ((error = namei(&nd)) != 0) {
                    925:                pathbuf_destroy(pb);
1.27      thorpej   926:                goto bad;
1.133     dholland  927:        }
1.9       mycroft   928:        vp = nd.ni_vp;
1.96      hannken   929:        if (vp != NULL) {
1.9       mycroft   930:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    931:                if (nd.ni_dvp == vp)
                    932:                        vrele(nd.ni_dvp);
1.1       cgd       933:                else
1.9       mycroft   934:                        vput(nd.ni_dvp);
1.1       cgd       935:                vrele(vp);
1.133     dholland  936:                pathbuf_destroy(pb);
1.96      hannken   937:                error = EADDRINUSE;
                    938:                goto bad;
1.1       cgd       939:        }
1.128     pooka     940:        vattr_null(&vattr);
1.1       cgd       941:        vattr.va_type = VSOCK;
1.84      jmmv      942:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.16      christos  943:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1.133     dholland  944:        if (error) {
                    945:                pathbuf_destroy(pb);
1.27      thorpej   946:                goto bad;
1.133     dholland  947:        }
1.9       mycroft   948:        vp = nd.ni_vp;
1.112     ad        949:        solock(so);
1.1       cgd       950:        vp->v_socket = unp->unp_socket;
                    951:        unp->unp_vnode = vp;
1.27      thorpej   952:        unp->unp_addrlen = addrlen;
                    953:        unp->unp_addr = sun;
1.99      he        954:        unp->unp_connid.unp_pid = p->p_pid;
1.112     ad        955:        unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                    956:        unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he        957:        unp->unp_flags |= UNP_EIDSBIND;
1.130     hannken   958:        VOP_UNLOCK(vp);
1.109     ad        959:        unp->unp_flags &= ~UNP_BUSY;
1.133     dholland  960:        pathbuf_destroy(pb);
1.1       cgd       961:        return (0);
1.27      thorpej   962:
                    963:  bad:
                    964:        free(sun, M_SONAME);
1.112     ad        965:        solock(so);
1.109     ad        966:        unp->unp_flags &= ~UNP_BUSY;
1.27      thorpej   967:        return (error);
1.1       cgd       968: }
                    969:
1.5       andrew    970: int
1.86      christos  971: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       972: {
1.46      augustss  973:        struct sockaddr_un *sun;
1.106     ad        974:        vnode_t *vp;
1.46      augustss  975:        struct socket *so2, *so3;
1.99      he        976:        struct unpcb *unp, *unp2, *unp3;
1.27      thorpej   977:        size_t addrlen;
1.1       cgd       978:        int error;
1.133     dholland  979:        struct pathbuf *pb;
1.1       cgd       980:        struct nameidata nd;
                    981:
1.109     ad        982:        unp = sotounpcb(so);
                    983:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                    984:                /*
                    985:                 * EALREADY may not be strictly accurate, but since this
                    986:                 * is a major application error it's hardly a big deal.
                    987:                 */
                    988:                return (EALREADY);
                    989:        }
                    990:        unp->unp_flags |= UNP_BUSY;
1.112     ad        991:        sounlock(so);
1.109     ad        992:
1.27      thorpej   993:        /*
                    994:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    995:         * byte so that we can ensure that the pathname is nul-terminated.
                    996:         * When we establish the connection, we copy the other PCB's
                    997:         * sockaddr to our own.
                    998:         */
                    999:        addrlen = nam->m_len + 1;
                   1000:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos 1001:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej  1002:        *(((char *)sun) + nam->m_len) = '\0';
                   1003:
1.133     dholland 1004:        pb = pathbuf_create(sun->sun_path);
                   1005:        if (pb == NULL) {
                   1006:                error = ENOMEM;
                   1007:                goto bad2;
                   1008:        }
1.27      thorpej  1009:
1.133     dholland 1010:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
                   1011:
                   1012:        if ((error = namei(&nd)) != 0) {
                   1013:                pathbuf_destroy(pb);
1.27      thorpej  1014:                goto bad2;
1.133     dholland 1015:        }
1.9       mycroft  1016:        vp = nd.ni_vp;
1.1       cgd      1017:        if (vp->v_type != VSOCK) {
                   1018:                error = ENOTSOCK;
                   1019:                goto bad;
                   1020:        }
1.133     dholland 1021:        pathbuf_destroy(pb);
1.102     pooka    1022:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
1.1       cgd      1023:                goto bad;
1.112     ad       1024:        /* Acquire v_interlock to protect against unp_detach(). */
1.135     rmind    1025:        mutex_enter(vp->v_interlock);
1.1       cgd      1026:        so2 = vp->v_socket;
1.112     ad       1027:        if (so2 == NULL) {
1.135     rmind    1028:                mutex_exit(vp->v_interlock);
1.1       cgd      1029:                error = ECONNREFUSED;
                   1030:                goto bad;
                   1031:        }
                   1032:        if (so->so_type != so2->so_type) {
1.135     rmind    1033:                mutex_exit(vp->v_interlock);
1.1       cgd      1034:                error = EPROTOTYPE;
                   1035:                goto bad;
                   1036:        }
1.112     ad       1037:        solock(so);
                   1038:        unp_resetlock(so);
1.135     rmind    1039:        mutex_exit(vp->v_interlock);
1.112     ad       1040:        if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
                   1041:                /*
                   1042:                 * This may seem somewhat fragile but is OK: if we can
                   1043:                 * see SO_ACCEPTCONN set on the endpoint, then it must
                   1044:                 * be locked by the domain-wide uipc_lock.
                   1045:                 */
1.132     yamt     1046:                KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.112     ad       1047:                    so2->so_lock == uipc_lock);
1.1       cgd      1048:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.124     yamt     1049:                    (so3 = sonewconn(so2, 0)) == NULL) {
1.1       cgd      1050:                        error = ECONNREFUSED;
1.112     ad       1051:                        sounlock(so);
1.1       cgd      1052:                        goto bad;
                   1053:                }
                   1054:                unp2 = sotounpcb(so2);
                   1055:                unp3 = sotounpcb(so3);
1.26      thorpej  1056:                if (unp2->unp_addr) {
                   1057:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                   1058:                            M_SONAME, M_WAITOK);
1.36      perry    1059:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej  1060:                            unp2->unp_addrlen);
                   1061:                        unp3->unp_addrlen = unp2->unp_addrlen;
                   1062:                }
1.30      thorpej  1063:                unp3->unp_flags = unp2->unp_flags;
1.112     ad       1064:                unp3->unp_connid.unp_pid = l->l_proc->p_pid;
                   1065:                unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                   1066:                unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he       1067:                unp3->unp_flags |= UNP_EIDSVALID;
                   1068:                if (unp2->unp_flags & UNP_EIDSBIND) {
                   1069:                        unp->unp_connid = unp2->unp_connid;
                   1070:                        unp->unp_flags |= UNP_EIDSVALID;
                   1071:                }
1.112     ad       1072:                so2 = so3;
1.33      thorpej  1073:        }
1.72      matt     1074:        error = unp_connect2(so, so2, PRU_CONNECT);
1.112     ad       1075:        sounlock(so);
1.27      thorpej  1076:  bad:
1.1       cgd      1077:        vput(vp);
1.27      thorpej  1078:  bad2:
                   1079:        free(sun, M_SONAME);
1.112     ad       1080:        solock(so);
1.109     ad       1081:        unp->unp_flags &= ~UNP_BUSY;
1.1       cgd      1082:        return (error);
                   1083: }
                   1084:
1.5       andrew   1085: int
1.76      matt     1086: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd      1087: {
1.46      augustss 1088:        struct unpcb *unp = sotounpcb(so);
                   1089:        struct unpcb *unp2;
1.1       cgd      1090:
                   1091:        if (so2->so_type != so->so_type)
                   1092:                return (EPROTOTYPE);
1.112     ad       1093:
                   1094:        /*
                   1095:         * All three sockets involved must be locked by same lock:
                   1096:         *
                   1097:         * local endpoint (so)
                   1098:         * remote endpoint (so2)
1.131     yamt     1099:         * queue head (so2->so_head, only if PR_CONNREQUIRED)
1.112     ad       1100:         */
                   1101:        KASSERT(solocked2(so, so2));
1.125     yamt     1102:        KASSERT(so->so_head == NULL);
                   1103:        if (so2->so_head != NULL) {
                   1104:                KASSERT(so2->so_lock == uipc_lock);
                   1105:                KASSERT(solocked2(so2, so2->so_head));
1.112     ad       1106:        }
                   1107:
1.1       cgd      1108:        unp2 = sotounpcb(so2);
                   1109:        unp->unp_conn = unp2;
                   1110:        switch (so->so_type) {
                   1111:
                   1112:        case SOCK_DGRAM:
                   1113:                unp->unp_nextref = unp2->unp_refs;
                   1114:                unp2->unp_refs = unp;
                   1115:                soisconnected(so);
                   1116:                break;
                   1117:
1.134     manu     1118:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1119:        case SOCK_STREAM:
                   1120:                unp2->unp_conn = unp;
1.72      matt     1121:                if (req == PRU_CONNECT &&
                   1122:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                   1123:                        soisconnecting(so);
                   1124:                else
                   1125:                        soisconnected(so);
1.1       cgd      1126:                soisconnected(so2);
1.112     ad       1127:                /*
                   1128:                 * If the connection is fully established, break the
                   1129:                 * association with uipc_lock and give the connected
                   1130:                 * pair a seperate lock to share.  For CONNECT2, we
                   1131:                 * require that the locks already match (the sockets
                   1132:                 * are created that way).
                   1133:                 */
1.125     yamt     1134:                if (req == PRU_CONNECT) {
                   1135:                        KASSERT(so2->so_head != NULL);
1.112     ad       1136:                        unp_setpeerlocks(so, so2);
1.125     yamt     1137:                }
1.1       cgd      1138:                break;
                   1139:
                   1140:        default:
                   1141:                panic("unp_connect2");
                   1142:        }
                   1143:        return (0);
                   1144: }
                   1145:
1.5       andrew   1146: void
1.76      matt     1147: unp_disconnect(struct unpcb *unp)
1.1       cgd      1148: {
1.46      augustss 1149:        struct unpcb *unp2 = unp->unp_conn;
1.112     ad       1150:        struct socket *so;
1.1       cgd      1151:
                   1152:        if (unp2 == 0)
                   1153:                return;
                   1154:        unp->unp_conn = 0;
1.112     ad       1155:        so = unp->unp_socket;
                   1156:        switch (so->so_type) {
1.1       cgd      1157:        case SOCK_DGRAM:
                   1158:                if (unp2->unp_refs == unp)
                   1159:                        unp2->unp_refs = unp->unp_nextref;
                   1160:                else {
                   1161:                        unp2 = unp2->unp_refs;
                   1162:                        for (;;) {
1.112     ad       1163:                                KASSERT(solocked2(so, unp2->unp_socket));
1.1       cgd      1164:                                if (unp2 == 0)
                   1165:                                        panic("unp_disconnect");
                   1166:                                if (unp2->unp_nextref == unp)
                   1167:                                        break;
                   1168:                                unp2 = unp2->unp_nextref;
                   1169:                        }
                   1170:                        unp2->unp_nextref = unp->unp_nextref;
                   1171:                }
                   1172:                unp->unp_nextref = 0;
1.112     ad       1173:                so->so_state &= ~SS_ISCONNECTED;
1.1       cgd      1174:                break;
                   1175:
1.134     manu     1176:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1177:        case SOCK_STREAM:
1.112     ad       1178:                KASSERT(solocked2(so, unp2->unp_socket));
                   1179:                soisdisconnected(so);
1.1       cgd      1180:                unp2->unp_conn = 0;
                   1181:                soisdisconnected(unp2->unp_socket);
                   1182:                break;
                   1183:        }
                   1184: }
                   1185:
                   1186: #ifdef notdef
1.76      matt     1187: unp_abort(struct unpcb *unp)
1.1       cgd      1188: {
                   1189:        unp_detach(unp);
                   1190: }
                   1191: #endif
                   1192:
1.5       andrew   1193: void
1.76      matt     1194: unp_shutdown(struct unpcb *unp)
1.1       cgd      1195: {
                   1196:        struct socket *so;
                   1197:
1.134     manu     1198:        switch(unp->unp_socket->so_type) {
                   1199:        case SOCK_SEQPACKET: /* FALLTHROUGH */
                   1200:        case SOCK_STREAM:
                   1201:                if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
                   1202:                        socantrcvmore(so);
                   1203:                break;
                   1204:        default:
                   1205:                break;
                   1206:        }
1.1       cgd      1207: }
                   1208:
1.112     ad       1209: bool
1.76      matt     1210: unp_drop(struct unpcb *unp, int errno)
1.1       cgd      1211: {
                   1212:        struct socket *so = unp->unp_socket;
                   1213:
1.112     ad       1214:        KASSERT(solocked(so));
                   1215:
1.1       cgd      1216:        so->so_error = errno;
                   1217:        unp_disconnect(unp);
                   1218:        if (so->so_head) {
1.112     ad       1219:                so->so_pcb = NULL;
                   1220:                /* sofree() drops the socket lock */
1.14      mycroft  1221:                sofree(so);
1.112     ad       1222:                unp_free(unp);
                   1223:                return true;
1.1       cgd      1224:        }
1.112     ad       1225:        return false;
1.1       cgd      1226: }
                   1227:
                   1228: #ifdef notdef
1.76      matt     1229: unp_drain(void)
1.1       cgd      1230: {
                   1231:
                   1232: }
                   1233: #endif
                   1234:
1.5       andrew   1235: int
1.136     christos 1236: unp_externalize(struct mbuf *rights, struct lwp *l, int flags)
1.1       cgd      1237: {
1.138     christos 1238:        struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
                   1239:        struct proc * const p = l->l_proc;
1.106     ad       1240:        file_t **rp;
1.138     christos 1241:        int error = 0;
1.47      thorpej  1242:
1.138     christos 1243:        const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1.106     ad       1244:            sizeof(file_t *);
1.1       cgd      1245:
1.138     christos 1246:        int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
1.101     ad       1247:        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50      thorpej  1248:
1.121     mrg      1249:        /* Make sure the recipient should be able to see the files.. */
1.139.2.1  tls      1250:        rp = (file_t **)CMSG_DATA(cm);
                   1251:        for (size_t i = 0; i < nfds; i++) {
                   1252:                file_t * const fp = *rp++;
                   1253:                if (fp == NULL) {
                   1254:                        error = EINVAL;
                   1255:                        goto out;
                   1256:                }
                   1257:                /*
                   1258:                 * If we are in a chroot'ed directory, and
                   1259:                 * someone wants to pass us a directory, make
                   1260:                 * sure it's inside the subtree we're allowed
                   1261:                 * to access.
                   1262:                 */
                   1263:                if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
                   1264:                        vnode_t *vp = (vnode_t *)fp->f_data;
                   1265:                        if ((vp->v_type == VDIR) &&
                   1266:                            !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
                   1267:                                error = EPERM;
                   1268:                                goto out;
1.39      sommerfe 1269:                        }
                   1270:                }
                   1271:        }
1.50      thorpej  1272:
                   1273:  restart:
1.24      cgd      1274:        /*
1.50      thorpej  1275:         * First loop -- allocate file descriptor table slots for the
1.121     mrg      1276:         * new files.
1.24      cgd      1277:         */
1.138     christos 1278:        for (size_t i = 0; i < nfds; i++) {
1.106     ad       1279:                if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej  1280:                        /*
1.50      thorpej  1281:                         * Back out what we've done so far.
1.49      thorpej  1282:                         */
1.138     christos 1283:                        while (i-- > 0) {
1.106     ad       1284:                                fd_abort(p, NULL, fdp[i]);
                   1285:                        }
1.50      thorpej  1286:                        if (error == ENOSPC) {
1.106     ad       1287:                                fd_tryexpand(p);
1.50      thorpej  1288:                                error = 0;
1.138     christos 1289:                                goto restart;
1.50      thorpej  1290:                        }
1.138     christos 1291:                        /*
                   1292:                         * This is the error that has historically
                   1293:                         * been returned, and some callers may
                   1294:                         * expect it.
                   1295:                         */
                   1296:                        error = EMSGSIZE;
                   1297:                        goto out;
1.49      thorpej  1298:                }
1.1       cgd      1299:        }
1.24      cgd      1300:
                   1301:        /*
1.50      thorpej  1302:         * Now that adding them has succeeded, update all of the
1.121     mrg      1303:         * file passing state and affix the descriptors.
1.112     ad       1304:         */
1.106     ad       1305:        rp = (file_t **)CMSG_DATA(cm);
1.138     christos 1306:        int *ofdp = (int *)CMSG_DATA(cm);
                   1307:        for (size_t i = 0; i < nfds; i++) {
                   1308:                file_t * const fp = *rp++;
                   1309:                const int fd = fdp[i];
1.106     ad       1310:                atomic_dec_uint(&unp_rights);
1.136     christos 1311:                fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
                   1312:                fd_affix(p, fp, fd);
1.138     christos 1313:                /*
                   1314:                 * Done with this file pointer, replace it with a fd;
                   1315:                 */
                   1316:                *ofdp++ = fd;
1.106     ad       1317:                mutex_enter(&fp->f_lock);
1.50      thorpej  1318:                fp->f_msgcount--;
1.106     ad       1319:                mutex_exit(&fp->f_lock);
                   1320:                /*
                   1321:                 * Note that fd_affix() adds a reference to the file.
                   1322:                 * The file may already have been closed by another
                   1323:                 * LWP in the process, so we must drop the reference
                   1324:                 * added by unp_internalize() with closef().
                   1325:                 */
                   1326:                closef(fp);
1.50      thorpej  1327:        }
                   1328:
                   1329:        /*
1.138     christos 1330:         * Adjust length, in case of transition from large file_t
                   1331:         * pointers to ints.
1.50      thorpej  1332:         */
1.138     christos 1333:        if (sizeof(file_t *) != sizeof(int)) {
                   1334:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                   1335:                rights->m_len = CMSG_SPACE(nfds * sizeof(int));
                   1336:        }
1.50      thorpej  1337:  out:
1.138     christos 1338:        if (__predict_false(error != 0)) {
1.139.2.2! tls      1339:                file_t **const fpp = (file_t **)CMSG_DATA(cm);
        !          1340:                for (size_t i = 0; i < nfds; i++)
        !          1341:                        unp_discard_now(fpp[i]);
        !          1342:                /*
        !          1343:                 * Truncate the array so that nobody will try to interpret
        !          1344:                 * what is now garbage in it.
        !          1345:                 */
        !          1346:                cm->cmsg_len = CMSG_LEN(0);
        !          1347:                rights->m_len = CMSG_SPACE(0);
1.138     christos 1348:        }
                   1349:
1.139.2.2! tls      1350:        /*
        !          1351:         * Don't disclose kernel memory in the alignment space.
        !          1352:         */
        !          1353:        KASSERT(cm->cmsg_len <= rights->m_len);
        !          1354:        memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len -
        !          1355:            cm->cmsg_len);
        !          1356:
1.101     ad       1357:        rw_exit(&p->p_cwdi->cwdi_lock);
1.138     christos 1358:        kmem_free(fdp, nfds * sizeof(int));
1.139     christos 1359:        return error;
1.1       cgd      1360: }
                   1361:
1.5       andrew   1362: int
1.112     ad       1363: unp_internalize(struct mbuf **controlp)
1.1       cgd      1364: {
1.121     mrg      1365:        filedesc_t *fdescp = curlwp->l_fd;
1.108     yamt     1366:        struct mbuf *control = *controlp;
1.73      martin   1367:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
1.106     ad       1368:        file_t **rp, **files;
                   1369:        file_t *fp;
1.46      augustss 1370:        int i, fd, *fdp;
1.106     ad       1371:        int nfds, error;
1.121     mrg      1372:        u_int maxmsg;
1.106     ad       1373:
                   1374:        error = 0;
                   1375:        newcm = NULL;
1.38      thorpej  1376:
1.106     ad       1377:        /* Sanity check the control message header. */
1.66      jdolecek 1378:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.117     christos 1379:            cm->cmsg_len > control->m_len ||
                   1380:            cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
1.1       cgd      1381:                return (EINVAL);
1.24      cgd      1382:
1.106     ad       1383:        /*
                   1384:         * Verify that the file descriptors are valid, and acquire
                   1385:         * a reference to each.
                   1386:         */
1.47      thorpej  1387:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                   1388:        fdp = (int *)CMSG_DATA(cm);
1.121     mrg      1389:        maxmsg = maxfiles / unp_rights_ratio;
1.24      cgd      1390:        for (i = 0; i < nfds; i++) {
                   1391:                fd = *fdp++;
1.121     mrg      1392:                if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
                   1393:                        atomic_dec_uint(&unp_rights);
                   1394:                        nfds = i;
                   1395:                        error = EAGAIN;
                   1396:                        goto out;
                   1397:                }
1.137     martin   1398:                if ((fp = fd_getfile(fd)) == NULL
                   1399:                    || fp->f_type == DTYPE_KQUEUE) {
                   1400:                        if (fp)
                   1401:                                fd_putfile(fd);
1.121     mrg      1402:                        atomic_dec_uint(&unp_rights);
1.120     pooka    1403:                        nfds = i;
1.106     ad       1404:                        error = EBADF;
                   1405:                        goto out;
1.101     ad       1406:                }
1.24      cgd      1407:        }
                   1408:
1.106     ad       1409:        /* Allocate new space and copy header into it. */
                   1410:        newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
                   1411:        if (newcm == NULL) {
                   1412:                error = E2BIG;
                   1413:                goto out;
                   1414:        }
                   1415:        memcpy(newcm, cm, sizeof(struct cmsghdr));
                   1416:        files = (file_t **)CMSG_DATA(newcm);
                   1417:
1.24      cgd      1418:        /*
1.106     ad       1419:         * Transform the file descriptors into file_t pointers, in
1.24      cgd      1420:         * reverse order so that if pointers are bigger than ints, the
1.106     ad       1421:         * int won't get until we're done.  No need to lock, as we have
                   1422:         * already validated the descriptors with fd_getfile().
1.24      cgd      1423:         */
1.94      cbiere   1424:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1425:        rp = files + nfds;
1.24      cgd      1426:        for (i = 0; i < nfds; i++) {
1.126     ad       1427:                fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file;
1.106     ad       1428:                KASSERT(fp != NULL);
                   1429:                mutex_enter(&fp->f_lock);
1.94      cbiere   1430:                *--rp = fp;
1.1       cgd      1431:                fp->f_count++;
                   1432:                fp->f_msgcount++;
1.106     ad       1433:                mutex_exit(&fp->f_lock);
                   1434:        }
                   1435:
                   1436:  out:
                   1437:        /* Release descriptor references. */
                   1438:        fdp = (int *)CMSG_DATA(cm);
                   1439:        for (i = 0; i < nfds; i++) {
                   1440:                fd_putfile(*fdp++);
1.121     mrg      1441:                if (error != 0) {
                   1442:                        atomic_dec_uint(&unp_rights);
                   1443:                }
1.1       cgd      1444:        }
1.73      martin   1445:
1.106     ad       1446:        if (error == 0) {
1.108     yamt     1447:                if (control->m_flags & M_EXT) {
                   1448:                        m_freem(control);
                   1449:                        *controlp = control = m_get(M_WAIT, MT_CONTROL);
                   1450:                }
1.106     ad       1451:                MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
1.73      martin   1452:                    M_MBUF, NULL, NULL);
                   1453:                cm = newcm;
1.106     ad       1454:                /*
                   1455:                 * Adjust message & mbuf to note amount of space
                   1456:                 * actually used.
                   1457:                 */
                   1458:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
                   1459:                control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
1.73      martin   1460:        }
                   1461:
1.106     ad       1462:        return error;
1.30      thorpej  1463: }
                   1464:
                   1465: struct mbuf *
1.92      ad       1466: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1467: {
                   1468:        struct cmsghdr *cmp;
                   1469:        struct sockcred *sc;
                   1470:        struct mbuf *m, *n;
1.47      thorpej  1471:        int len, space, i;
1.30      thorpej  1472:
1.92      ad       1473:        len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
                   1474:        space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30      thorpej  1475:
                   1476:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1477:        if (space > MLEN) {
                   1478:                if (space > MCLBYTES)
                   1479:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1480:                else
1.59      matt     1481:                        m_clget(m, M_WAIT);
1.30      thorpej  1482:                if ((m->m_flags & M_EXT) == 0) {
                   1483:                        m_free(m);
                   1484:                        return (control);
                   1485:                }
                   1486:        }
                   1487:
1.47      thorpej  1488:        m->m_len = space;
1.30      thorpej  1489:        m->m_next = NULL;
                   1490:        cmp = mtod(m, struct cmsghdr *);
                   1491:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1492:        cmp->cmsg_len = len;
                   1493:        cmp->cmsg_level = SOL_SOCKET;
                   1494:        cmp->cmsg_type = SCM_CREDS;
1.92      ad       1495:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1496:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1497:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1498:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1499:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30      thorpej  1500:        for (i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1501:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1502:
                   1503:        /*
                   1504:         * If a control message already exists, append us to the end.
                   1505:         */
                   1506:        if (control != NULL) {
                   1507:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1508:                        ;
                   1509:                n->m_next = m;
                   1510:        } else
                   1511:                control = m;
                   1512:
                   1513:        return (control);
1.1       cgd      1514: }
                   1515:
1.39      sommerfe 1516: /*
1.121     mrg      1517:  * Do a mark-sweep GC of files in the system, to free up any which are
                   1518:  * caught in flight to an about-to-be-closed socket.  Additionally,
                   1519:  * process deferred file closures.
1.39      sommerfe 1520:  */
1.121     mrg      1521: static void
                   1522: unp_gc(file_t *dp)
1.1       cgd      1523: {
1.121     mrg      1524:        extern  struct domain unixdomain;
                   1525:        file_t *fp, *np;
1.46      augustss 1526:        struct socket *so, *so1;
1.121     mrg      1527:        u_int i, old, new;
                   1528:        bool didwork;
1.1       cgd      1529:
1.121     mrg      1530:        KASSERT(curlwp == unp_thread_lwp);
                   1531:        KASSERT(mutex_owned(&filelist_lock));
1.106     ad       1532:
1.121     mrg      1533:        /*
                   1534:         * First, process deferred file closures.
                   1535:         */
                   1536:        while (!SLIST_EMPTY(&unp_thread_discard)) {
                   1537:                fp = SLIST_FIRST(&unp_thread_discard);
                   1538:                KASSERT(fp->f_unpcount > 0);
                   1539:                KASSERT(fp->f_count > 0);
                   1540:                KASSERT(fp->f_msgcount > 0);
                   1541:                KASSERT(fp->f_count >= fp->f_unpcount);
                   1542:                KASSERT(fp->f_count >= fp->f_msgcount);
                   1543:                KASSERT(fp->f_msgcount >= fp->f_unpcount);
                   1544:                SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
                   1545:                i = fp->f_unpcount;
                   1546:                fp->f_unpcount = 0;
                   1547:                mutex_exit(&filelist_lock);
                   1548:                for (; i != 0; i--) {
                   1549:                        unp_discard_now(fp);
                   1550:                }
                   1551:                mutex_enter(&filelist_lock);
                   1552:        }
1.39      sommerfe 1553:
1.121     mrg      1554:        /*
                   1555:         * Clear mark bits.  Ensure that we don't consider new files
                   1556:         * entering the file table during this loop (they will not have
                   1557:         * FSCAN set).
                   1558:         */
1.106     ad       1559:        unp_defer = 0;
                   1560:        LIST_FOREACH(fp, &filehead, f_list) {
1.121     mrg      1561:                for (old = fp->f_flag;; old = new) {
                   1562:                        new = atomic_cas_uint(&fp->f_flag, old,
                   1563:                            (old | FSCAN) & ~(FMARK|FDEFER));
                   1564:                        if (__predict_true(old == new)) {
                   1565:                                break;
                   1566:                        }
                   1567:                }
1.106     ad       1568:        }
1.39      sommerfe 1569:
                   1570:        /*
1.121     mrg      1571:         * Iterate over the set of sockets, marking ones believed (based on
                   1572:         * refcount) to be referenced from a process, and marking for rescan
                   1573:         * sockets which are queued on a socket.  Recan continues descending
                   1574:         * and searching for sockets referenced by sockets (FDEFER), until
                   1575:         * there are no more socket->socket references to be discovered.
1.39      sommerfe 1576:         */
1.1       cgd      1577:        do {
1.121     mrg      1578:                didwork = false;
                   1579:                for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1580:                        KASSERT(mutex_owned(&filelist_lock));
                   1581:                        np = LIST_NEXT(fp, f_list);
1.106     ad       1582:                        mutex_enter(&fp->f_lock);
1.121     mrg      1583:                        if ((fp->f_flag & FDEFER) != 0) {
1.106     ad       1584:                                atomic_and_uint(&fp->f_flag, ~FDEFER);
1.1       cgd      1585:                                unp_defer--;
1.106     ad       1586:                                KASSERT(fp->f_count != 0);
1.1       cgd      1587:                        } else {
1.101     ad       1588:                                if (fp->f_count == 0 ||
1.121     mrg      1589:                                    (fp->f_flag & FMARK) != 0 ||
                   1590:                                    fp->f_count == fp->f_msgcount ||
                   1591:                                    fp->f_unpcount != 0) {
1.106     ad       1592:                                        mutex_exit(&fp->f_lock);
1.1       cgd      1593:                                        continue;
1.101     ad       1594:                                }
1.1       cgd      1595:                        }
1.106     ad       1596:                        atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1597:
1.1       cgd      1598:                        if (fp->f_type != DTYPE_SOCKET ||
1.112     ad       1599:                            (so = fp->f_data) == NULL ||
1.101     ad       1600:                            so->so_proto->pr_domain != &unixdomain ||
1.121     mrg      1601:                            (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
1.106     ad       1602:                                mutex_exit(&fp->f_lock);
1.1       cgd      1603:                                continue;
1.101     ad       1604:                        }
1.121     mrg      1605:
                   1606:                        /* Gain file ref, mark our position, and unlock. */
                   1607:                        didwork = true;
                   1608:                        LIST_INSERT_AFTER(fp, dp, f_list);
                   1609:                        fp->f_count++;
1.106     ad       1610:                        mutex_exit(&fp->f_lock);
1.121     mrg      1611:                        mutex_exit(&filelist_lock);
1.101     ad       1612:
1.112     ad       1613:                        /*
1.121     mrg      1614:                         * Mark files referenced from sockets queued on the
                   1615:                         * accept queue as well.
1.112     ad       1616:                         */
                   1617:                        solock(so);
1.39      sommerfe 1618:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1.121     mrg      1619:                        if ((so->so_options & SO_ACCEPTCONN) != 0) {
1.54      matt     1620:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1621:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1622:                                }
1.54      matt     1623:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1624:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1625:                                }
                   1626:                        }
1.112     ad       1627:                        sounlock(so);
1.121     mrg      1628:
                   1629:                        /* Re-lock and restart from where we left off. */
                   1630:                        closef(fp);
                   1631:                        mutex_enter(&filelist_lock);
                   1632:                        np = LIST_NEXT(dp, f_list);
                   1633:                        LIST_REMOVE(dp, f_list);
1.1       cgd      1634:                }
1.121     mrg      1635:                /*
                   1636:                 * Bail early if we did nothing in the loop above.  Could
                   1637:                 * happen because of concurrent activity causing unp_defer
                   1638:                 * to get out of sync.
                   1639:                 */
                   1640:        } while (unp_defer != 0 && didwork);
1.101     ad       1641:
1.8       mycroft  1642:        /*
1.121     mrg      1643:         * Sweep pass.
1.8       mycroft  1644:         *
1.121     mrg      1645:         * We grab an extra reference to each of the files that are
                   1646:         * not otherwise accessible and then free the rights that are
                   1647:         * stored in messages on them.
1.8       mycroft  1648:         */
1.121     mrg      1649:        for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1650:                KASSERT(mutex_owned(&filelist_lock));
                   1651:                np = LIST_NEXT(fp, f_list);
1.106     ad       1652:                mutex_enter(&fp->f_lock);
1.121     mrg      1653:
                   1654:                /*
                   1655:                 * Ignore non-sockets.
                   1656:                 * Ignore dead sockets, or sockets with pending close.
                   1657:                 * Ignore sockets obviously referenced elsewhere.
                   1658:                 * Ignore sockets marked as referenced by our scan.
                   1659:                 * Ignore new sockets that did not exist during the scan.
                   1660:                 */
                   1661:                if (fp->f_type != DTYPE_SOCKET ||
                   1662:                    fp->f_count == 0 || fp->f_unpcount != 0 ||
                   1663:                    fp->f_count != fp->f_msgcount ||
                   1664:                    (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
                   1665:                        mutex_exit(&fp->f_lock);
                   1666:                        continue;
1.8       mycroft  1667:                }
1.121     mrg      1668:
                   1669:                /* Gain file ref, mark our position, and unlock. */
                   1670:                LIST_INSERT_AFTER(fp, dp, f_list);
                   1671:                fp->f_count++;
1.106     ad       1672:                mutex_exit(&fp->f_lock);
1.121     mrg      1673:                mutex_exit(&filelist_lock);
                   1674:
                   1675:                /*
                   1676:                 * Flush all data from the socket's receive buffer.
                   1677:                 * This will cause files referenced only by the
                   1678:                 * socket to be queued for close.
                   1679:                 */
                   1680:                so = fp->f_data;
                   1681:                solock(so);
                   1682:                sorflush(so);
                   1683:                sounlock(so);
                   1684:
                   1685:                /* Re-lock and restart from where we left off. */
                   1686:                closef(fp);
                   1687:                mutex_enter(&filelist_lock);
                   1688:                np = LIST_NEXT(dp, f_list);
                   1689:                LIST_REMOVE(dp, f_list);
                   1690:        }
                   1691: }
                   1692:
                   1693: /*
                   1694:  * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
                   1695:  * wake once per second to garbage collect.  Run continually while we
                   1696:  * have deferred closes to process.
                   1697:  */
                   1698: static void
                   1699: unp_thread(void *cookie)
                   1700: {
                   1701:        file_t *dp;
                   1702:
                   1703:        /* Allocate a dummy file for our scans. */
                   1704:        if ((dp = fgetdummy()) == NULL) {
                   1705:                panic("unp_thread");
1.1       cgd      1706:        }
1.101     ad       1707:
1.121     mrg      1708:        mutex_enter(&filelist_lock);
                   1709:        for (;;) {
                   1710:                KASSERT(mutex_owned(&filelist_lock));
                   1711:                if (SLIST_EMPTY(&unp_thread_discard)) {
                   1712:                        if (unp_rights != 0) {
                   1713:                                (void)cv_timedwait(&unp_thread_cv,
                   1714:                                    &filelist_lock, hz);
                   1715:                        } else {
                   1716:                                cv_wait(&unp_thread_cv, &filelist_lock);
                   1717:                        }
1.112     ad       1718:                }
1.121     mrg      1719:                unp_gc(dp);
1.39      sommerfe 1720:        }
1.121     mrg      1721:        /* NOTREACHED */
                   1722: }
                   1723:
                   1724: /*
                   1725:  * Kick the garbage collector into action if there is something for
                   1726:  * it to process.
                   1727:  */
                   1728: static void
                   1729: unp_thread_kick(void)
                   1730: {
                   1731:
                   1732:        if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
                   1733:                mutex_enter(&filelist_lock);
                   1734:                cv_signal(&unp_thread_cv);
                   1735:                mutex_exit(&filelist_lock);
1.44      thorpej  1736:        }
1.1       cgd      1737: }
                   1738:
1.5       andrew   1739: void
1.76      matt     1740: unp_dispose(struct mbuf *m)
1.1       cgd      1741: {
1.8       mycroft  1742:
1.1       cgd      1743:        if (m)
1.121     mrg      1744:                unp_scan(m, unp_discard_later, 1);
1.1       cgd      1745: }
                   1746:
1.5       andrew   1747: void
1.106     ad       1748: unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
1.1       cgd      1749: {
1.46      augustss 1750:        struct mbuf *m;
1.121     mrg      1751:        file_t **rp, *fp;
1.46      augustss 1752:        struct cmsghdr *cm;
1.121     mrg      1753:        int i, qfds;
1.1       cgd      1754:
                   1755:        while (m0) {
1.48      thorpej  1756:                for (m = m0; m; m = m->m_next) {
1.121     mrg      1757:                        if (m->m_type != MT_CONTROL ||
                   1758:                            m->m_len < sizeof(*cm)) {
                   1759:                                continue;
                   1760:                        }
                   1761:                        cm = mtod(m, struct cmsghdr *);
                   1762:                        if (cm->cmsg_level != SOL_SOCKET ||
                   1763:                            cm->cmsg_type != SCM_RIGHTS)
                   1764:                                continue;
                   1765:                        qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1766:                            / sizeof(file_t *);
                   1767:                        rp = (file_t **)CMSG_DATA(cm);
                   1768:                        for (i = 0; i < qfds; i++) {
                   1769:                                fp = *rp;
                   1770:                                if (discard) {
                   1771:                                        *rp = 0;
1.39      sommerfe 1772:                                }
1.121     mrg      1773:                                (*op)(fp);
                   1774:                                rp++;
1.1       cgd      1775:                        }
1.48      thorpej  1776:                }
1.52      thorpej  1777:                m0 = m0->m_nextpkt;
1.1       cgd      1778:        }
                   1779: }
                   1780:
1.5       andrew   1781: void
1.106     ad       1782: unp_mark(file_t *fp)
1.1       cgd      1783: {
1.101     ad       1784:
1.39      sommerfe 1785:        if (fp == NULL)
                   1786:                return;
1.80      perry    1787:
1.39      sommerfe 1788:        /* If we're already deferred, don't screw up the defer count */
1.106     ad       1789:        mutex_enter(&fp->f_lock);
1.101     ad       1790:        if (fp->f_flag & (FMARK | FDEFER)) {
1.106     ad       1791:                mutex_exit(&fp->f_lock);
1.1       cgd      1792:                return;
1.101     ad       1793:        }
1.39      sommerfe 1794:
                   1795:        /*
1.121     mrg      1796:         * Minimize the number of deferrals...  Sockets are the only type of
                   1797:         * file which can hold references to another file, so just mark
                   1798:         * other files, and defer unmarked sockets for the next pass.
1.39      sommerfe 1799:         */
                   1800:        if (fp->f_type == DTYPE_SOCKET) {
                   1801:                unp_defer++;
1.106     ad       1802:                KASSERT(fp->f_count != 0);
                   1803:                atomic_or_uint(&fp->f_flag, FDEFER);
1.39      sommerfe 1804:        } else {
1.106     ad       1805:                atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1806:        }
1.106     ad       1807:        mutex_exit(&fp->f_lock);
1.1       cgd      1808: }
                   1809:
1.121     mrg      1810: static void
                   1811: unp_discard_now(file_t *fp)
1.1       cgd      1812: {
1.106     ad       1813:
1.39      sommerfe 1814:        if (fp == NULL)
                   1815:                return;
1.106     ad       1816:
1.121     mrg      1817:        KASSERT(fp->f_count > 0);
                   1818:        KASSERT(fp->f_msgcount > 0);
                   1819:
1.106     ad       1820:        mutex_enter(&fp->f_lock);
1.1       cgd      1821:        fp->f_msgcount--;
1.106     ad       1822:        mutex_exit(&fp->f_lock);
                   1823:        atomic_dec_uint(&unp_rights);
                   1824:        (void)closef(fp);
1.1       cgd      1825: }
1.121     mrg      1826:
                   1827: static void
                   1828: unp_discard_later(file_t *fp)
                   1829: {
                   1830:
                   1831:        if (fp == NULL)
                   1832:                return;
                   1833:
                   1834:        KASSERT(fp->f_count > 0);
                   1835:        KASSERT(fp->f_msgcount > 0);
                   1836:
                   1837:        mutex_enter(&filelist_lock);
                   1838:        if (fp->f_unpcount++ == 0) {
                   1839:                SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
                   1840:        }
                   1841:        mutex_exit(&filelist_lock);
                   1842: }

CVSweb <webmaster@jp.NetBSD.org>