[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.140

1.140   ! christos    1: /*     $NetBSD: uipc_usrreq.c,v 1.139 2012/07/30 10:45:03 christos Exp $       */
1.30      thorpej     2:
                      3: /*-
1.121     mrg         4:  * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.121     mrg         9:  * NASA Ames Research Center, and by Andrew Doran.
1.30      thorpej    10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     21:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     22:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     23:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     24:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     25:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     26:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     27:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     28:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     29:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     30:  * POSSIBILITY OF SUCH DAMAGE.
                     31:  */
1.10      cgd        32:
1.1       cgd        33: /*
1.8       mycroft    34:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     35:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        36:  *
                     37:  * Redistribution and use in source and binary forms, with or without
                     38:  * modification, are permitted provided that the following conditions
                     39:  * are met:
                     40:  * 1. Redistributions of source code must retain the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer.
                     42:  * 2. Redistributions in binary form must reproduce the above copyright
                     43:  *    notice, this list of conditions and the following disclaimer in the
                     44:  *    documentation and/or other materials provided with the distribution.
1.67      agc        45:  * 3. Neither the name of the University nor the names of its contributors
                     46:  *    may be used to endorse or promote products derived from this software
                     47:  *    without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     50:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     51:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     52:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     53:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     54:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     55:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     56:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     57:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     58:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     59:  * SUCH DAMAGE.
                     60:  *
                     61:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     62:  */
                     63:
                     64: /*
                     65:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     66:  *
                     67:  * Redistribution and use in source and binary forms, with or without
                     68:  * modification, are permitted provided that the following conditions
                     69:  * are met:
                     70:  * 1. Redistributions of source code must retain the above copyright
                     71:  *    notice, this list of conditions and the following disclaimer.
                     72:  * 2. Redistributions in binary form must reproduce the above copyright
                     73:  *    notice, this list of conditions and the following disclaimer in the
                     74:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        75:  * 3. All advertising materials mentioning features or use of this software
                     76:  *    must display the following acknowledgement:
                     77:  *     This product includes software developed by the University of
                     78:  *     California, Berkeley and its contributors.
                     79:  * 4. Neither the name of the University nor the names of its contributors
                     80:  *    may be used to endorse or promote products derived from this software
                     81:  *    without specific prior written permission.
                     82:  *
                     83:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     84:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     85:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     86:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     87:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     88:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     89:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     90:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     91:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     92:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     93:  * SUCH DAMAGE.
                     94:  *
1.31      fvdl       95:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd        96:  */
1.53      lukem      97:
                     98: #include <sys/cdefs.h>
1.140   ! christos   99: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.139 2012/07/30 10:45:03 christos Exp $");
1.1       cgd       100:
1.7       mycroft   101: #include <sys/param.h>
1.8       mycroft   102: #include <sys/systm.h>
1.7       mycroft   103: #include <sys/proc.h>
                    104: #include <sys/filedesc.h>
                    105: #include <sys/domain.h>
                    106: #include <sys/protosw.h>
                    107: #include <sys/socket.h>
                    108: #include <sys/socketvar.h>
                    109: #include <sys/unpcb.h>
                    110: #include <sys/un.h>
                    111: #include <sys/namei.h>
                    112: #include <sys/vnode.h>
                    113: #include <sys/file.h>
                    114: #include <sys/stat.h>
                    115: #include <sys/mbuf.h>
1.91      elad      116: #include <sys/kauth.h>
1.101     ad        117: #include <sys/kmem.h>
1.106     ad        118: #include <sys/atomic.h>
1.119     pooka     119: #include <sys/uidinfo.h>
1.121     mrg       120: #include <sys/kernel.h>
                    121: #include <sys/kthread.h>
1.1       cgd       122:
                    123: /*
                    124:  * Unix communications domain.
                    125:  *
                    126:  * TODO:
1.134     manu      127:  *     RDM
1.1       cgd       128:  *     rethink name space problems
                    129:  *     need a proper out-of-band
1.112     ad        130:  *
                    131:  * Notes on locking:
                    132:  *
                    133:  * The generic rules noted in uipc_socket2.c apply.  In addition:
                    134:  *
                    135:  * o We have a global lock, uipc_lock.
                    136:  *
                    137:  * o All datagram sockets are locked by uipc_lock.
                    138:  *
                    139:  * o For stream socketpairs, the two endpoints are created sharing the same
                    140:  *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
                    141:  *   matching locks.
                    142:  *
                    143:  * o Stream sockets created via socket() start life with their own
                    144:  *   independent lock.
                    145:  *
                    146:  * o Stream connections to a named endpoint are slightly more complicated.
                    147:  *   Sockets that have called listen() have their lock pointer mutated to
                    148:  *   the global uipc_lock.  When establishing a connection, the connecting
                    149:  *   socket also has its lock mutated to uipc_lock, which matches the head
                    150:  *   (listening socket).  We create a new socket for accept() to return, and
                    151:  *   that also shares the head's lock.  Until the connection is completely
                    152:  *   done on both ends, all three sockets are locked by uipc_lock.  Once the
                    153:  *   connection is complete, the association with the head's lock is broken.
                    154:  *   The connecting socket and the socket returned from accept() have their
                    155:  *   lock pointers mutated away from uipc_lock, and back to the connecting
                    156:  *   socket's original, independent lock.  The head continues to be locked
                    157:  *   by uipc_lock.
                    158:  *
                    159:  * o If uipc_lock is determined to be a significant source of contention,
                    160:  *   it could easily be hashed out.  It is difficult to simply make it an
                    161:  *   independent lock because of visibility / garbage collection issues:
                    162:  *   if a socket has been associated with a lock at any point, that lock
                    163:  *   must remain valid until the socket is no longer visible in the system.
                    164:  *   The lock must not be freed or otherwise destroyed until any sockets
                    165:  *   that had referenced it have also been destroyed.
1.1       cgd       166:  */
1.93      christos  167: const struct sockaddr_un sun_noname = {
                    168:        .sun_len = sizeof(sun_noname),
                    169:        .sun_family = AF_LOCAL,
                    170: };
1.1       cgd       171: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    172:
1.92      ad        173: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.121     mrg       174: static void unp_mark(file_t *);
                    175: static void unp_scan(struct mbuf *, void (*)(file_t *), int);
                    176: static void unp_discard_now(file_t *);
                    177: static void unp_discard_later(file_t *);
                    178: static void unp_thread(void *);
                    179: static void unp_thread_kick(void);
1.112     ad        180: static kmutex_t *uipc_lock;
                    181:
1.121     mrg       182: static kcondvar_t unp_thread_cv;
                    183: static lwp_t *unp_thread_lwp;
                    184: static SLIST_HEAD(,file) unp_thread_discard;
                    185: static int unp_defer;
                    186:
1.112     ad        187: /*
                    188:  * Initialize Unix protocols.
                    189:  */
                    190: void
                    191: uipc_init(void)
                    192: {
1.121     mrg       193:        int error;
1.112     ad        194:
                    195:        uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
1.121     mrg       196:        cv_init(&unp_thread_cv, "unpgc");
                    197:
                    198:        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
                    199:            NULL, &unp_thread_lwp, "unpgc");
                    200:        if (error != 0)
                    201:                panic("uipc_init %d", error);
1.112     ad        202: }
                    203:
                    204: /*
                    205:  * A connection succeeded: disassociate both endpoints from the head's
                    206:  * lock, and make them share their own lock.  There is a race here: for
                    207:  * a very brief time one endpoint will be locked by a different lock
                    208:  * than the other end.  However, since the current thread holds the old
                    209:  * lock (the listening socket's lock, the head) access can still only be
                    210:  * made to one side of the connection.
                    211:  */
                    212: static void
                    213: unp_setpeerlocks(struct socket *so, struct socket *so2)
                    214: {
                    215:        struct unpcb *unp;
                    216:        kmutex_t *lock;
                    217:
                    218:        KASSERT(solocked2(so, so2));
                    219:
                    220:        /*
                    221:         * Bail out if either end of the socket is not yet fully
                    222:         * connected or accepted.  We only break the lock association
                    223:         * with the head when the pair of sockets stand completely
                    224:         * on their own.
                    225:         */
1.125     yamt      226:        KASSERT(so->so_head == NULL);
                    227:        if (so2->so_head != NULL)
1.112     ad        228:                return;
                    229:
                    230:        /*
                    231:         * Drop references to old lock.  A third reference (from the
                    232:         * queue head) must be held as we still hold its lock.  Bonus:
                    233:         * we don't need to worry about garbage collecting the lock.
                    234:         */
                    235:        lock = so->so_lock;
                    236:        KASSERT(lock == uipc_lock);
                    237:        mutex_obj_free(lock);
                    238:        mutex_obj_free(lock);
                    239:
                    240:        /*
                    241:         * Grab stream lock from the initiator and share between the two
                    242:         * endpoints.  Issue memory barrier to ensure all modifications
                    243:         * become globally visible before the lock change.  so2 is
                    244:         * assumed not to have a stream lock, because it was created
                    245:         * purely for the server side to accept this connection and
                    246:         * started out life using the domain-wide lock.
                    247:         */
                    248:        unp = sotounpcb(so);
                    249:        KASSERT(unp->unp_streamlock != NULL);
                    250:        KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
                    251:        lock = unp->unp_streamlock;
                    252:        unp->unp_streamlock = NULL;
                    253:        mutex_obj_hold(lock);
                    254:        membar_exit();
1.127     bouyer    255:        /*
                    256:         * possible race if lock is not held - see comment in
                    257:         * uipc_usrreq(PRU_ACCEPT).
                    258:         */
                    259:        KASSERT(mutex_owned(lock));
1.115     ad        260:        solockreset(so, lock);
                    261:        solockreset(so2, lock);
1.112     ad        262: }
                    263:
                    264: /*
                    265:  * Reset a socket's lock back to the domain-wide lock.
                    266:  */
                    267: static void
                    268: unp_resetlock(struct socket *so)
                    269: {
                    270:        kmutex_t *olock, *nlock;
                    271:        struct unpcb *unp;
                    272:
                    273:        KASSERT(solocked(so));
                    274:
                    275:        olock = so->so_lock;
                    276:        nlock = uipc_lock;
                    277:        if (olock == nlock)
                    278:                return;
                    279:        unp = sotounpcb(so);
                    280:        KASSERT(unp->unp_streamlock == NULL);
                    281:        unp->unp_streamlock = olock;
                    282:        mutex_obj_hold(nlock);
                    283:        mutex_enter(nlock);
1.115     ad        284:        solockreset(so, nlock);
1.112     ad        285:        mutex_exit(olock);
                    286: }
                    287:
                    288: static void
                    289: unp_free(struct unpcb *unp)
                    290: {
                    291:
                    292:        if (unp->unp_addr)
                    293:                free(unp->unp_addr, M_SONAME);
                    294:        if (unp->unp_streamlock != NULL)
                    295:                mutex_obj_free(unp->unp_streamlock);
                    296:        free(unp, M_PCB);
                    297: }
1.30      thorpej   298:
1.20      mycroft   299: int
1.76      matt      300: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        301:        struct lwp *l)
1.20      mycroft   302: {
                    303:        struct socket *so2;
1.77      matt      304:        const struct sockaddr_un *sun;
1.20      mycroft   305:
                    306:        so2 = unp->unp_conn->unp_socket;
1.112     ad        307:
                    308:        KASSERT(solocked(so2));
                    309:
1.20      mycroft   310:        if (unp->unp_addr)
                    311:                sun = unp->unp_addr;
                    312:        else
                    313:                sun = &sun_noname;
1.30      thorpej   314:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        315:                control = unp_addsockcred(l, control);
1.82      christos  316:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   317:            control) == 0) {
1.112     ad        318:                so2->so_rcv.sb_overflowed++;
1.98      martin    319:                unp_dispose(control);
1.20      mycroft   320:                m_freem(control);
                    321:                m_freem(m);
1.60      christos  322:                return (ENOBUFS);
1.20      mycroft   323:        } else {
                    324:                sorwakeup(so2);
                    325:                return (0);
                    326:        }
                    327: }
                    328:
                    329: void
1.112     ad        330: unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr)
1.20      mycroft   331: {
1.77      matt      332:        const struct sockaddr_un *sun;
1.112     ad        333:        struct unpcb *unp;
                    334:        bool ext;
1.20      mycroft   335:
1.127     bouyer    336:        KASSERT(solocked(so));
1.112     ad        337:        unp = sotounpcb(so);
                    338:        ext = false;
1.20      mycroft   339:
1.112     ad        340:        for (;;) {
                    341:                sun = NULL;
                    342:                if (peeraddr) {
                    343:                        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    344:                                sun = unp->unp_conn->unp_addr;
                    345:                } else {
                    346:                        if (unp->unp_addr)
                    347:                                sun = unp->unp_addr;
                    348:                }
                    349:                if (sun == NULL)
                    350:                        sun = &sun_noname;
                    351:                nam->m_len = sun->sun_len;
                    352:                if (nam->m_len > MLEN && !ext) {
                    353:                        sounlock(so);
                    354:                        MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK);
                    355:                        solock(so);
                    356:                        ext = true;
                    357:                } else {
                    358:                        KASSERT(nam->m_len <= MAXPATHLEN * 2);
                    359:                        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
                    360:                        break;
                    361:                }
                    362:        }
1.20      mycroft   363: }
                    364:
1.1       cgd       365: /*ARGSUSED*/
1.5       andrew    366: int
1.76      matt      367: uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
1.86      christos  368:        struct mbuf *control, struct lwp *l)
1.1       cgd       369: {
                    370:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  371:        struct socket *so2;
1.86      christos  372:        struct proc *p;
1.75      christos  373:        u_int newhiwat;
1.46      augustss  374:        int error = 0;
1.1       cgd       375:
                    376:        if (req == PRU_CONTROL)
                    377:                return (EOPNOTSUPP);
1.20      mycroft   378:
1.22      mycroft   379: #ifdef DIAGNOSTIC
                    380:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    381:                panic("uipc_usrreq: unexpected control mbuf");
                    382: #endif
1.86      christos  383:        p = l ? l->l_proc : NULL;
1.112     ad        384:        if (req != PRU_ATTACH) {
1.122     yamt      385:                if (unp == NULL) {
1.112     ad        386:                        error = EINVAL;
                    387:                        goto release;
                    388:                }
                    389:                KASSERT(solocked(so));
1.1       cgd       390:        }
1.20      mycroft   391:
1.1       cgd       392:        switch (req) {
                    393:
                    394:        case PRU_ATTACH:
1.122     yamt      395:                if (unp != NULL) {
1.1       cgd       396:                        error = EISCONN;
                    397:                        break;
                    398:                }
                    399:                error = unp_attach(so);
                    400:                break;
                    401:
                    402:        case PRU_DETACH:
                    403:                unp_detach(unp);
                    404:                break;
                    405:
                    406:        case PRU_BIND:
1.90      christos  407:                KASSERT(l != NULL);
1.112     ad        408:                error = unp_bind(so, nam, l);
1.1       cgd       409:                break;
                    410:
                    411:        case PRU_LISTEN:
1.112     ad        412:                /*
                    413:                 * If the socket can accept a connection, it must be
                    414:                 * locked by uipc_lock.
                    415:                 */
                    416:                unp_resetlock(so);
1.122     yamt      417:                if (unp->unp_vnode == NULL)
1.1       cgd       418:                        error = EINVAL;
                    419:                break;
                    420:
                    421:        case PRU_CONNECT:
1.90      christos  422:                KASSERT(l != NULL);
1.86      christos  423:                error = unp_connect(so, nam, l);
1.1       cgd       424:                break;
                    425:
                    426:        case PRU_CONNECT2:
1.72      matt      427:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       428:                break;
                    429:
                    430:        case PRU_DISCONNECT:
                    431:                unp_disconnect(unp);
                    432:                break;
                    433:
                    434:        case PRU_ACCEPT:
1.112     ad        435:                KASSERT(so->so_lock == uipc_lock);
1.72      matt      436:                /*
                    437:                 * Mark the initiating STREAM socket as connected *ONLY*
                    438:                 * after it's been accepted.  This prevents a client from
                    439:                 * overrunning a server and receiving ECONNREFUSED.
                    440:                 */
1.112     ad        441:                if (unp->unp_conn == NULL)
                    442:                        break;
                    443:                so2 = unp->unp_conn->unp_socket;
                    444:                if (so2->so_state & SS_ISCONNECTING) {
                    445:                        KASSERT(solocked2(so, so->so_head));
                    446:                        KASSERT(solocked2(so2, so->so_head));
                    447:                        soisconnected(so2);
                    448:                }
                    449:                /*
                    450:                 * If the connection is fully established, break the
                    451:                 * association with uipc_lock and give the connected
                    452:                 * pair a seperate lock to share.
1.127     bouyer    453:                 * There is a race here: sotounpcb(so2)->unp_streamlock
                    454:                 * is not locked, so when changing so2->so_lock
                    455:                 * another thread can grab it while so->so_lock is still
                    456:                 * pointing to the (locked) uipc_lock.
1.129     wiz       457:                 * this should be harmless, except that this makes
1.127     bouyer    458:                 * solocked2() and solocked() unreliable.
                    459:                 * Another problem is that unp_setaddr() expects the
                    460:                 * the socket locked. Grabing sotounpcb(so2)->unp_streamlock
                    461:                 * fixes both issues.
1.112     ad        462:                 */
1.127     bouyer    463:                mutex_enter(sotounpcb(so2)->unp_streamlock);
1.112     ad        464:                unp_setpeerlocks(so2, so);
                    465:                /*
                    466:                 * Only now return peer's address, as we may need to
                    467:                 * block in order to allocate memory.
                    468:                 *
                    469:                 * XXX Minor race: connection can be broken while
                    470:                 * lock is dropped in unp_setaddr().  We will return
                    471:                 * error == 0 and sun_noname as the peer address.
                    472:                 */
                    473:                unp_setaddr(so, nam, true);
1.127     bouyer    474:                /* so_lock now points to unp_streamlock */
                    475:                mutex_exit(so2->so_lock);
1.1       cgd       476:                break;
                    477:
                    478:        case PRU_SHUTDOWN:
                    479:                socantsendmore(so);
                    480:                unp_shutdown(unp);
                    481:                break;
                    482:
                    483:        case PRU_RCVD:
                    484:                switch (so->so_type) {
                    485:
                    486:                case SOCK_DGRAM:
                    487:                        panic("uipc 1");
                    488:                        /*NOTREACHED*/
                    489:
1.134     manu      490:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       491:                case SOCK_STREAM:
                    492: #define        rcv (&so->so_rcv)
                    493: #define snd (&so2->so_snd)
                    494:                        if (unp->unp_conn == 0)
                    495:                                break;
                    496:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        497:                        KASSERT(solocked2(so, so2));
1.1       cgd       498:                        /*
                    499:                         * Adjust backpressure on sender
                    500:                         * and wakeup any waiting to write.
                    501:                         */
                    502:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    503:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  504:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  505:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  506:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       507:                        unp->unp_cc = rcv->sb_cc;
                    508:                        sowwakeup(so2);
                    509: #undef snd
                    510: #undef rcv
                    511:                        break;
                    512:
                    513:                default:
                    514:                        panic("uipc 2");
                    515:                }
                    516:                break;
                    517:
                    518:        case PRU_SEND:
1.30      thorpej   519:                /*
                    520:                 * Note: unp_internalize() rejects any control message
                    521:                 * other than SCM_RIGHTS, and only allows one.  This
                    522:                 * has the side-effect of preventing a caller from
                    523:                 * forging SCM_CREDS.
                    524:                 */
1.90      christos  525:                if (control) {
1.112     ad        526:                        sounlock(so);
                    527:                        error = unp_internalize(&control);
                    528:                        solock(so);
                    529:                        if (error != 0) {
1.111     mlelstv   530:                                m_freem(control);
                    531:                                m_freem(m);
                    532:                                break;
                    533:                        }
1.83      yamt      534:                }
1.1       cgd       535:                switch (so->so_type) {
                    536:
                    537:                case SOCK_DGRAM: {
1.112     ad        538:                        KASSERT(so->so_lock == uipc_lock);
1.1       cgd       539:                        if (nam) {
1.111     mlelstv   540:                                if ((so->so_state & SS_ISCONNECTED) != 0)
1.1       cgd       541:                                        error = EISCONN;
1.111     mlelstv   542:                                else {
1.112     ad        543:                                        /*
                    544:                                         * Note: once connected, the
                    545:                                         * socket's lock must not be
                    546:                                         * dropped until we have sent
                    547:                                         * the message and disconnected.
                    548:                                         * This is necessary to prevent
                    549:                                         * intervening control ops, like
                    550:                                         * another connection.
                    551:                                         */
1.111     mlelstv   552:                                        error = unp_connect(so, nam, l);
1.20      mycroft   553:                                }
1.1       cgd       554:                        } else {
1.111     mlelstv   555:                                if ((so->so_state & SS_ISCONNECTED) == 0)
1.1       cgd       556:                                        error = ENOTCONN;
1.111     mlelstv   557:                        }
                    558:                        if (error) {
                    559:                                unp_dispose(control);
                    560:                                m_freem(control);
                    561:                                m_freem(m);
                    562:                                break;
1.1       cgd       563:                        }
1.89      christos  564:                        KASSERT(p != NULL);
1.92      ad        565:                        error = unp_output(m, control, unp, l);
1.1       cgd       566:                        if (nam)
                    567:                                unp_disconnect(unp);
                    568:                        break;
                    569:                }
                    570:
1.134     manu      571:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       572:                case SOCK_STREAM:
                    573: #define        rcv (&so2->so_rcv)
                    574: #define        snd (&so->so_snd)
1.87      christos  575:                        if (unp->unp_conn == NULL) {
                    576:                                error = ENOTCONN;
                    577:                                break;
                    578:                        }
1.1       cgd       579:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        580:                        KASSERT(solocked2(so, so2));
1.30      thorpej   581:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    582:                                /*
                    583:                                 * Credentials are passed only once on
1.134     manu      584:                                 * SOCK_STREAM and SOCK_SEQPACKET.
1.30      thorpej   585:                                 */
                    586:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        587:                                control = unp_addsockcred(l, control);
1.30      thorpej   588:                        }
1.1       cgd       589:                        /*
                    590:                         * Send to paired receive port, and then reduce
                    591:                         * send buffer hiwater marks to maintain backpressure.
                    592:                         * Wake up readers.
                    593:                         */
                    594:                        if (control) {
1.112     ad        595:                                if (sbappendcontrol(rcv, m, control) != 0)
                    596:                                        control = NULL;
1.134     manu      597:                        } else {
                    598:                                switch(so->so_type) {
                    599:                                case SOCK_SEQPACKET:
                    600:                                        sbappendrecord(rcv, m);
                    601:                                        break;
                    602:                                case SOCK_STREAM:
                    603:                                        sbappend(rcv, m);
                    604:                                        break;
                    605:                                default:
                    606:                                        panic("uipc_usrreq");
                    607:                                        break;
                    608:                                }
                    609:                        }
1.1       cgd       610:                        snd->sb_mbmax -=
                    611:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    612:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  613:                        newhiwat = snd->sb_hiwat -
                    614:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  615:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  616:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       617:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    618:                        sorwakeup(so2);
                    619: #undef snd
                    620: #undef rcv
1.112     ad        621:                        if (control != NULL) {
                    622:                                unp_dispose(control);
                    623:                                m_freem(control);
                    624:                        }
1.1       cgd       625:                        break;
                    626:
                    627:                default:
                    628:                        panic("uipc 4");
                    629:                }
                    630:                break;
                    631:
                    632:        case PRU_ABORT:
1.112     ad        633:                (void)unp_drop(unp, ECONNABORTED);
1.39      sommerfe  634:
1.88      matt      635:                KASSERT(so->so_head == NULL);
1.39      sommerfe  636: #ifdef DIAGNOSTIC
1.122     yamt      637:                if (so->so_pcb == NULL)
1.39      sommerfe  638:                        panic("uipc 5: drop killed pcb");
                    639: #endif
                    640:                unp_detach(unp);
1.1       cgd       641:                break;
                    642:
                    643:        case PRU_SENSE:
                    644:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
1.134     manu      645:                switch (so->so_type) {
                    646:                case SOCK_SEQPACKET: /* FALLTHROUGH */
                    647:                case SOCK_STREAM:
                    648:                        if (unp->unp_conn == 0)
                    649:                                break;
                    650:
1.1       cgd       651:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        652:                        KASSERT(solocked2(so, so2));
1.1       cgd       653:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
1.134     manu      654:                        break;
                    655:                default:
                    656:                        break;
1.1       cgd       657:                }
                    658:                ((struct stat *) m)->st_dev = NODEV;
                    659:                if (unp->unp_ino == 0)
                    660:                        unp->unp_ino = unp_ino++;
1.25      kleink    661:                ((struct stat *) m)->st_atimespec =
                    662:                    ((struct stat *) m)->st_mtimespec =
                    663:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       664:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    665:                return (0);
                    666:
                    667:        case PRU_RCVOOB:
1.20      mycroft   668:                error = EOPNOTSUPP;
                    669:                break;
1.1       cgd       670:
                    671:        case PRU_SENDOOB:
1.22      mycroft   672:                m_freem(control);
1.20      mycroft   673:                m_freem(m);
1.1       cgd       674:                error = EOPNOTSUPP;
                    675:                break;
                    676:
                    677:        case PRU_SOCKADDR:
1.112     ad        678:                unp_setaddr(so, nam, false);
1.1       cgd       679:                break;
                    680:
                    681:        case PRU_PEERADDR:
1.112     ad        682:                unp_setaddr(so, nam, true);
1.1       cgd       683:                break;
                    684:
                    685:        default:
                    686:                panic("piusrreq");
                    687:        }
1.20      mycroft   688:
1.1       cgd       689: release:
                    690:        return (error);
                    691: }
                    692:
                    693: /*
1.30      thorpej   694:  * Unix domain socket option processing.
                    695:  */
                    696: int
1.118     plunky    697: uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1.30      thorpej   698: {
                    699:        struct unpcb *unp = sotounpcb(so);
                    700:        int optval = 0, error = 0;
                    701:
1.112     ad        702:        KASSERT(solocked(so));
                    703:
1.118     plunky    704:        if (sopt->sopt_level != 0) {
1.100     dyoung    705:                error = ENOPROTOOPT;
1.30      thorpej   706:        } else switch (op) {
                    707:
                    708:        case PRCO_SETOPT:
1.118     plunky    709:                switch (sopt->sopt_name) {
1.30      thorpej   710:                case LOCAL_CREDS:
1.72      matt      711:                case LOCAL_CONNWAIT:
1.118     plunky    712:                        error = sockopt_getint(sopt, &optval);
                    713:                        if (error)
                    714:                                break;
                    715:                        switch (sopt->sopt_name) {
1.30      thorpej   716: #define        OPTSET(bit) \
                    717:        if (optval) \
                    718:                unp->unp_flags |= (bit); \
                    719:        else \
                    720:                unp->unp_flags &= ~(bit);
                    721:
1.118     plunky    722:                        case LOCAL_CREDS:
                    723:                                OPTSET(UNP_WANTCRED);
                    724:                                break;
                    725:                        case LOCAL_CONNWAIT:
                    726:                                OPTSET(UNP_CONNWAIT);
                    727:                                break;
1.30      thorpej   728:                        }
                    729:                        break;
                    730: #undef OPTSET
                    731:
                    732:                default:
                    733:                        error = ENOPROTOOPT;
                    734:                        break;
                    735:                }
                    736:                break;
                    737:
                    738:        case PRCO_GETOPT:
1.112     ad        739:                sounlock(so);
1.118     plunky    740:                switch (sopt->sopt_name) {
1.99      he        741:                case LOCAL_PEEREID:
                    742:                        if (unp->unp_flags & UNP_EIDSVALID) {
1.118     plunky    743:                                error = sockopt_set(sopt,
                    744:                                    &unp->unp_connid, sizeof(unp->unp_connid));
1.99      he        745:                        } else {
                    746:                                error = EINVAL;
                    747:                        }
                    748:                        break;
1.30      thorpej   749:                case LOCAL_CREDS:
                    750: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    751:
1.99      he        752:                        optval = OPTBIT(UNP_WANTCRED);
1.118     plunky    753:                        error = sockopt_setint(sopt, optval);
1.30      thorpej   754:                        break;
                    755: #undef OPTBIT
                    756:
                    757:                default:
                    758:                        error = ENOPROTOOPT;
                    759:                        break;
                    760:                }
1.112     ad        761:                solock(so);
1.30      thorpej   762:                break;
                    763:        }
                    764:        return (error);
                    765: }
                    766:
                    767: /*
1.1       cgd       768:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    769:  * for stream sockets, although the total for sender and receiver is
                    770:  * actually only PIPSIZ.
                    771:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    772:  * and don't really want to reserve the sendspace.  Their recvspace should
                    773:  * be large enough for at least one max-size datagram plus address.
                    774:  */
                    775: #define        PIPSIZ  4096
                    776: u_long unpst_sendspace = PIPSIZ;
                    777: u_long unpst_recvspace = PIPSIZ;
                    778: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    779: u_long unpdg_recvspace = 4*1024;
                    780:
1.121     mrg       781: u_int  unp_rights;                     /* files in flight */
                    782: u_int  unp_rights_ratio = 2;           /* limit, fraction of maxfiles */
1.1       cgd       783:
1.5       andrew    784: int
1.76      matt      785: unp_attach(struct socket *so)
1.1       cgd       786: {
1.46      augustss  787:        struct unpcb *unp;
1.1       cgd       788:        int error;
1.80      perry     789:
1.112     ad        790:        switch (so->so_type) {
1.134     manu      791:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.112     ad        792:        case SOCK_STREAM:
                    793:                if (so->so_lock == NULL) {
                    794:                        /*
                    795:                         * XXX Assuming that no socket locks are held,
                    796:                         * as this call may sleep.
                    797:                         */
                    798:                        so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                    799:                        solock(so);
                    800:                }
                    801:                if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1.1       cgd       802:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
1.112     ad        803:                        if (error != 0)
                    804:                                return (error);
                    805:                }
                    806:                break;
1.1       cgd       807:
1.112     ad        808:        case SOCK_DGRAM:
                    809:                if (so->so_lock == NULL) {
                    810:                        mutex_obj_hold(uipc_lock);
                    811:                        so->so_lock = uipc_lock;
                    812:                        solock(so);
                    813:                }
                    814:                if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1.1       cgd       815:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
1.112     ad        816:                        if (error != 0)
                    817:                                return (error);
                    818:                }
                    819:                break;
1.8       mycroft   820:
1.112     ad        821:        default:
                    822:                panic("unp_attach");
1.1       cgd       823:        }
1.112     ad        824:        KASSERT(solocked(so));
1.14      mycroft   825:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    826:        if (unp == NULL)
1.1       cgd       827:                return (ENOBUFS);
1.123     yamt      828:        memset(unp, 0, sizeof(*unp));
1.14      mycroft   829:        unp->unp_socket = so;
1.15      mycroft   830:        so->so_pcb = unp;
1.85      simonb    831:        nanotime(&unp->unp_ctime);
1.1       cgd       832:        return (0);
                    833: }
                    834:
1.17      pk        835: void
1.76      matt      836: unp_detach(struct unpcb *unp)
1.1       cgd       837: {
1.112     ad        838:        struct socket *so;
                    839:        vnode_t *vp;
                    840:
                    841:        so = unp->unp_socket;
1.80      perry     842:
1.112     ad        843:  retry:
                    844:        if ((vp = unp->unp_vnode) != NULL) {
                    845:                sounlock(so);
                    846:                /* Acquire v_interlock to protect against unp_connect(). */
1.113     ad        847:                /* XXXAD racy */
1.135     rmind     848:                mutex_enter(vp->v_interlock);
1.112     ad        849:                vp->v_socket = NULL;
                    850:                vrelel(vp, 0);
                    851:                solock(so);
                    852:                unp->unp_vnode = NULL;
1.1       cgd       853:        }
                    854:        if (unp->unp_conn)
                    855:                unp_disconnect(unp);
1.112     ad        856:        while (unp->unp_refs) {
                    857:                KASSERT(solocked2(so, unp->unp_refs->unp_socket));
                    858:                if (unp_drop(unp->unp_refs, ECONNRESET)) {
                    859:                        solock(so);
                    860:                        goto retry;
                    861:                }
                    862:        }
                    863:        soisdisconnected(so);
                    864:        so->so_pcb = NULL;
1.8       mycroft   865:        if (unp_rights) {
                    866:                /*
1.121     mrg       867:                 * Normally the receive buffer is flushed later, in sofree,
                    868:                 * but if our receive buffer holds references to files that
                    869:                 * are now garbage, we will enqueue those file references to
                    870:                 * the garbage collector and kick it into action.
1.8       mycroft   871:                 */
1.112     ad        872:                sorflush(so);
                    873:                unp_free(unp);
1.121     mrg       874:                unp_thread_kick();
1.14      mycroft   875:        } else
1.112     ad        876:                unp_free(unp);
1.1       cgd       877: }
                    878:
1.5       andrew    879: int
1.112     ad        880: unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       881: {
1.27      thorpej   882:        struct sockaddr_un *sun;
1.112     ad        883:        struct unpcb *unp;
1.106     ad        884:        vnode_t *vp;
1.1       cgd       885:        struct vattr vattr;
1.27      thorpej   886:        size_t addrlen;
1.1       cgd       887:        int error;
1.133     dholland  888:        struct pathbuf *pb;
1.1       cgd       889:        struct nameidata nd;
1.112     ad        890:        proc_t *p;
1.1       cgd       891:
1.112     ad        892:        unp = sotounpcb(so);
                    893:        if (unp->unp_vnode != NULL)
1.20      mycroft   894:                return (EINVAL);
1.109     ad        895:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                    896:                /*
                    897:                 * EALREADY may not be strictly accurate, but since this
                    898:                 * is a major application error it's hardly a big deal.
                    899:                 */
                    900:                return (EALREADY);
                    901:        }
                    902:        unp->unp_flags |= UNP_BUSY;
1.112     ad        903:        sounlock(so);
1.109     ad        904:
1.27      thorpej   905:        /*
                    906:         * Allocate the new sockaddr.  We have to allocate one
                    907:         * extra byte so that we can ensure that the pathname
                    908:         * is nul-terminated.
                    909:         */
1.112     ad        910:        p = l->l_proc;
1.27      thorpej   911:        addrlen = nam->m_len + 1;
                    912:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos  913:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej   914:        *(((char *)sun) + nam->m_len) = '\0';
                    915:
1.133     dholland  916:        pb = pathbuf_create(sun->sun_path);
                    917:        if (pb == NULL) {
                    918:                error = ENOMEM;
                    919:                goto bad;
                    920:        }
                    921:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);
1.27      thorpej   922:
1.1       cgd       923: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.133     dholland  924:        if ((error = namei(&nd)) != 0) {
                    925:                pathbuf_destroy(pb);
1.27      thorpej   926:                goto bad;
1.133     dholland  927:        }
1.9       mycroft   928:        vp = nd.ni_vp;
1.96      hannken   929:        if (vp != NULL) {
1.9       mycroft   930:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    931:                if (nd.ni_dvp == vp)
                    932:                        vrele(nd.ni_dvp);
1.1       cgd       933:                else
1.9       mycroft   934:                        vput(nd.ni_dvp);
1.1       cgd       935:                vrele(vp);
1.133     dholland  936:                pathbuf_destroy(pb);
1.96      hannken   937:                error = EADDRINUSE;
                    938:                goto bad;
1.1       cgd       939:        }
1.128     pooka     940:        vattr_null(&vattr);
1.1       cgd       941:        vattr.va_type = VSOCK;
1.84      jmmv      942:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.16      christos  943:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1.133     dholland  944:        if (error) {
                    945:                pathbuf_destroy(pb);
1.27      thorpej   946:                goto bad;
1.133     dholland  947:        }
1.9       mycroft   948:        vp = nd.ni_vp;
1.112     ad        949:        solock(so);
1.1       cgd       950:        vp->v_socket = unp->unp_socket;
                    951:        unp->unp_vnode = vp;
1.27      thorpej   952:        unp->unp_addrlen = addrlen;
                    953:        unp->unp_addr = sun;
1.99      he        954:        unp->unp_connid.unp_pid = p->p_pid;
1.112     ad        955:        unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                    956:        unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he        957:        unp->unp_flags |= UNP_EIDSBIND;
1.130     hannken   958:        VOP_UNLOCK(vp);
1.109     ad        959:        unp->unp_flags &= ~UNP_BUSY;
1.133     dholland  960:        pathbuf_destroy(pb);
1.1       cgd       961:        return (0);
1.27      thorpej   962:
                    963:  bad:
                    964:        free(sun, M_SONAME);
1.112     ad        965:        solock(so);
1.109     ad        966:        unp->unp_flags &= ~UNP_BUSY;
1.27      thorpej   967:        return (error);
1.1       cgd       968: }
                    969:
1.5       andrew    970: int
1.86      christos  971: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       972: {
1.46      augustss  973:        struct sockaddr_un *sun;
1.106     ad        974:        vnode_t *vp;
1.46      augustss  975:        struct socket *so2, *so3;
1.99      he        976:        struct unpcb *unp, *unp2, *unp3;
1.27      thorpej   977:        size_t addrlen;
1.1       cgd       978:        int error;
1.133     dholland  979:        struct pathbuf *pb;
1.1       cgd       980:        struct nameidata nd;
                    981:
1.109     ad        982:        unp = sotounpcb(so);
                    983:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                    984:                /*
                    985:                 * EALREADY may not be strictly accurate, but since this
                    986:                 * is a major application error it's hardly a big deal.
                    987:                 */
                    988:                return (EALREADY);
                    989:        }
                    990:        unp->unp_flags |= UNP_BUSY;
1.112     ad        991:        sounlock(so);
1.109     ad        992:
1.27      thorpej   993:        /*
                    994:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    995:         * byte so that we can ensure that the pathname is nul-terminated.
                    996:         * When we establish the connection, we copy the other PCB's
                    997:         * sockaddr to our own.
                    998:         */
                    999:        addrlen = nam->m_len + 1;
                   1000:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
1.95      christos 1001:        m_copydata(nam, 0, nam->m_len, (void *)sun);
1.27      thorpej  1002:        *(((char *)sun) + nam->m_len) = '\0';
                   1003:
1.133     dholland 1004:        pb = pathbuf_create(sun->sun_path);
                   1005:        if (pb == NULL) {
                   1006:                error = ENOMEM;
                   1007:                goto bad2;
                   1008:        }
1.27      thorpej  1009:
1.133     dholland 1010:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
                   1011:
                   1012:        if ((error = namei(&nd)) != 0) {
                   1013:                pathbuf_destroy(pb);
1.27      thorpej  1014:                goto bad2;
1.133     dholland 1015:        }
1.9       mycroft  1016:        vp = nd.ni_vp;
1.1       cgd      1017:        if (vp->v_type != VSOCK) {
                   1018:                error = ENOTSOCK;
                   1019:                goto bad;
                   1020:        }
1.133     dholland 1021:        pathbuf_destroy(pb);
1.102     pooka    1022:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
1.1       cgd      1023:                goto bad;
1.112     ad       1024:        /* Acquire v_interlock to protect against unp_detach(). */
1.135     rmind    1025:        mutex_enter(vp->v_interlock);
1.1       cgd      1026:        so2 = vp->v_socket;
1.112     ad       1027:        if (so2 == NULL) {
1.135     rmind    1028:                mutex_exit(vp->v_interlock);
1.1       cgd      1029:                error = ECONNREFUSED;
                   1030:                goto bad;
                   1031:        }
                   1032:        if (so->so_type != so2->so_type) {
1.135     rmind    1033:                mutex_exit(vp->v_interlock);
1.1       cgd      1034:                error = EPROTOTYPE;
                   1035:                goto bad;
                   1036:        }
1.112     ad       1037:        solock(so);
                   1038:        unp_resetlock(so);
1.135     rmind    1039:        mutex_exit(vp->v_interlock);
1.112     ad       1040:        if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
                   1041:                /*
                   1042:                 * This may seem somewhat fragile but is OK: if we can
                   1043:                 * see SO_ACCEPTCONN set on the endpoint, then it must
                   1044:                 * be locked by the domain-wide uipc_lock.
                   1045:                 */
1.132     yamt     1046:                KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.112     ad       1047:                    so2->so_lock == uipc_lock);
1.1       cgd      1048:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.124     yamt     1049:                    (so3 = sonewconn(so2, 0)) == NULL) {
1.1       cgd      1050:                        error = ECONNREFUSED;
1.112     ad       1051:                        sounlock(so);
1.1       cgd      1052:                        goto bad;
                   1053:                }
                   1054:                unp2 = sotounpcb(so2);
                   1055:                unp3 = sotounpcb(so3);
1.26      thorpej  1056:                if (unp2->unp_addr) {
                   1057:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                   1058:                            M_SONAME, M_WAITOK);
1.36      perry    1059:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej  1060:                            unp2->unp_addrlen);
                   1061:                        unp3->unp_addrlen = unp2->unp_addrlen;
                   1062:                }
1.30      thorpej  1063:                unp3->unp_flags = unp2->unp_flags;
1.112     ad       1064:                unp3->unp_connid.unp_pid = l->l_proc->p_pid;
                   1065:                unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                   1066:                unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he       1067:                unp3->unp_flags |= UNP_EIDSVALID;
                   1068:                if (unp2->unp_flags & UNP_EIDSBIND) {
                   1069:                        unp->unp_connid = unp2->unp_connid;
                   1070:                        unp->unp_flags |= UNP_EIDSVALID;
                   1071:                }
1.112     ad       1072:                so2 = so3;
1.33      thorpej  1073:        }
1.72      matt     1074:        error = unp_connect2(so, so2, PRU_CONNECT);
1.112     ad       1075:        sounlock(so);
1.27      thorpej  1076:  bad:
1.1       cgd      1077:        vput(vp);
1.27      thorpej  1078:  bad2:
                   1079:        free(sun, M_SONAME);
1.112     ad       1080:        solock(so);
1.109     ad       1081:        unp->unp_flags &= ~UNP_BUSY;
1.1       cgd      1082:        return (error);
                   1083: }
                   1084:
1.5       andrew   1085: int
1.76      matt     1086: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd      1087: {
1.46      augustss 1088:        struct unpcb *unp = sotounpcb(so);
                   1089:        struct unpcb *unp2;
1.1       cgd      1090:
                   1091:        if (so2->so_type != so->so_type)
                   1092:                return (EPROTOTYPE);
1.112     ad       1093:
                   1094:        /*
                   1095:         * All three sockets involved must be locked by same lock:
                   1096:         *
                   1097:         * local endpoint (so)
                   1098:         * remote endpoint (so2)
1.131     yamt     1099:         * queue head (so2->so_head, only if PR_CONNREQUIRED)
1.112     ad       1100:         */
                   1101:        KASSERT(solocked2(so, so2));
1.125     yamt     1102:        KASSERT(so->so_head == NULL);
                   1103:        if (so2->so_head != NULL) {
                   1104:                KASSERT(so2->so_lock == uipc_lock);
                   1105:                KASSERT(solocked2(so2, so2->so_head));
1.112     ad       1106:        }
                   1107:
1.1       cgd      1108:        unp2 = sotounpcb(so2);
                   1109:        unp->unp_conn = unp2;
                   1110:        switch (so->so_type) {
                   1111:
                   1112:        case SOCK_DGRAM:
                   1113:                unp->unp_nextref = unp2->unp_refs;
                   1114:                unp2->unp_refs = unp;
                   1115:                soisconnected(so);
                   1116:                break;
                   1117:
1.134     manu     1118:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1119:        case SOCK_STREAM:
                   1120:                unp2->unp_conn = unp;
1.72      matt     1121:                if (req == PRU_CONNECT &&
                   1122:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                   1123:                        soisconnecting(so);
                   1124:                else
                   1125:                        soisconnected(so);
1.1       cgd      1126:                soisconnected(so2);
1.112     ad       1127:                /*
                   1128:                 * If the connection is fully established, break the
                   1129:                 * association with uipc_lock and give the connected
                   1130:                 * pair a seperate lock to share.  For CONNECT2, we
                   1131:                 * require that the locks already match (the sockets
                   1132:                 * are created that way).
                   1133:                 */
1.125     yamt     1134:                if (req == PRU_CONNECT) {
                   1135:                        KASSERT(so2->so_head != NULL);
1.112     ad       1136:                        unp_setpeerlocks(so, so2);
1.125     yamt     1137:                }
1.1       cgd      1138:                break;
                   1139:
                   1140:        default:
                   1141:                panic("unp_connect2");
                   1142:        }
                   1143:        return (0);
                   1144: }
                   1145:
1.5       andrew   1146: void
1.76      matt     1147: unp_disconnect(struct unpcb *unp)
1.1       cgd      1148: {
1.46      augustss 1149:        struct unpcb *unp2 = unp->unp_conn;
1.112     ad       1150:        struct socket *so;
1.1       cgd      1151:
                   1152:        if (unp2 == 0)
                   1153:                return;
                   1154:        unp->unp_conn = 0;
1.112     ad       1155:        so = unp->unp_socket;
                   1156:        switch (so->so_type) {
1.1       cgd      1157:        case SOCK_DGRAM:
                   1158:                if (unp2->unp_refs == unp)
                   1159:                        unp2->unp_refs = unp->unp_nextref;
                   1160:                else {
                   1161:                        unp2 = unp2->unp_refs;
                   1162:                        for (;;) {
1.112     ad       1163:                                KASSERT(solocked2(so, unp2->unp_socket));
1.1       cgd      1164:                                if (unp2 == 0)
                   1165:                                        panic("unp_disconnect");
                   1166:                                if (unp2->unp_nextref == unp)
                   1167:                                        break;
                   1168:                                unp2 = unp2->unp_nextref;
                   1169:                        }
                   1170:                        unp2->unp_nextref = unp->unp_nextref;
                   1171:                }
                   1172:                unp->unp_nextref = 0;
1.112     ad       1173:                so->so_state &= ~SS_ISCONNECTED;
1.1       cgd      1174:                break;
                   1175:
1.134     manu     1176:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1177:        case SOCK_STREAM:
1.112     ad       1178:                KASSERT(solocked2(so, unp2->unp_socket));
                   1179:                soisdisconnected(so);
1.1       cgd      1180:                unp2->unp_conn = 0;
                   1181:                soisdisconnected(unp2->unp_socket);
                   1182:                break;
                   1183:        }
                   1184: }
                   1185:
                   1186: #ifdef notdef
1.76      matt     1187: unp_abort(struct unpcb *unp)
1.1       cgd      1188: {
                   1189:        unp_detach(unp);
                   1190: }
                   1191: #endif
                   1192:
1.5       andrew   1193: void
1.76      matt     1194: unp_shutdown(struct unpcb *unp)
1.1       cgd      1195: {
                   1196:        struct socket *so;
                   1197:
1.134     manu     1198:        switch(unp->unp_socket->so_type) {
                   1199:        case SOCK_SEQPACKET: /* FALLTHROUGH */
                   1200:        case SOCK_STREAM:
                   1201:                if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
                   1202:                        socantrcvmore(so);
                   1203:                break;
                   1204:        default:
                   1205:                break;
                   1206:        }
1.1       cgd      1207: }
                   1208:
1.112     ad       1209: bool
1.76      matt     1210: unp_drop(struct unpcb *unp, int errno)
1.1       cgd      1211: {
                   1212:        struct socket *so = unp->unp_socket;
                   1213:
1.112     ad       1214:        KASSERT(solocked(so));
                   1215:
1.1       cgd      1216:        so->so_error = errno;
                   1217:        unp_disconnect(unp);
                   1218:        if (so->so_head) {
1.112     ad       1219:                so->so_pcb = NULL;
                   1220:                /* sofree() drops the socket lock */
1.14      mycroft  1221:                sofree(so);
1.112     ad       1222:                unp_free(unp);
                   1223:                return true;
1.1       cgd      1224:        }
1.112     ad       1225:        return false;
1.1       cgd      1226: }
                   1227:
                   1228: #ifdef notdef
1.76      matt     1229: unp_drain(void)
1.1       cgd      1230: {
                   1231:
                   1232: }
                   1233: #endif
                   1234:
1.5       andrew   1235: int
1.136     christos 1236: unp_externalize(struct mbuf *rights, struct lwp *l, int flags)
1.1       cgd      1237: {
1.138     christos 1238:        struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
                   1239:        struct proc * const p = l->l_proc;
1.106     ad       1240:        file_t **rp;
1.138     christos 1241:        int error = 0;
1.47      thorpej  1242:
1.138     christos 1243:        const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1.106     ad       1244:            sizeof(file_t *);
1.1       cgd      1245:
1.138     christos 1246:        int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
1.101     ad       1247:        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50      thorpej  1248:
1.121     mrg      1249:        /* Make sure the recipient should be able to see the files.. */
1.140   ! christos 1250:        rp = (file_t **)CMSG_DATA(cm);
        !          1251:        for (size_t i = 0; i < nfds; i++) {
        !          1252:                file_t * const fp = *rp++;
        !          1253:                if (fp == NULL) {
        !          1254:                        error = EINVAL;
        !          1255:                        goto out;
        !          1256:                }
        !          1257:                /*
        !          1258:                 * If we are in a chroot'ed directory, and
        !          1259:                 * someone wants to pass us a directory, make
        !          1260:                 * sure it's inside the subtree we're allowed
        !          1261:                 * to access.
        !          1262:                 */
        !          1263:                if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
        !          1264:                        vnode_t *vp = (vnode_t *)fp->f_data;
        !          1265:                        if ((vp->v_type == VDIR) &&
        !          1266:                            !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
        !          1267:                                error = EPERM;
        !          1268:                                goto out;
1.39      sommerfe 1269:                        }
                   1270:                }
                   1271:        }
1.50      thorpej  1272:
                   1273:  restart:
1.24      cgd      1274:        /*
1.50      thorpej  1275:         * First loop -- allocate file descriptor table slots for the
1.121     mrg      1276:         * new files.
1.24      cgd      1277:         */
1.138     christos 1278:        for (size_t i = 0; i < nfds; i++) {
1.106     ad       1279:                if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej  1280:                        /*
1.50      thorpej  1281:                         * Back out what we've done so far.
1.49      thorpej  1282:                         */
1.138     christos 1283:                        while (i-- > 0) {
1.106     ad       1284:                                fd_abort(p, NULL, fdp[i]);
                   1285:                        }
1.50      thorpej  1286:                        if (error == ENOSPC) {
1.106     ad       1287:                                fd_tryexpand(p);
1.50      thorpej  1288:                                error = 0;
1.138     christos 1289:                                goto restart;
1.50      thorpej  1290:                        }
1.138     christos 1291:                        /*
                   1292:                         * This is the error that has historically
                   1293:                         * been returned, and some callers may
                   1294:                         * expect it.
                   1295:                         */
                   1296:                        error = EMSGSIZE;
                   1297:                        goto out;
1.49      thorpej  1298:                }
1.1       cgd      1299:        }
1.24      cgd      1300:
                   1301:        /*
1.50      thorpej  1302:         * Now that adding them has succeeded, update all of the
1.121     mrg      1303:         * file passing state and affix the descriptors.
1.112     ad       1304:         */
1.106     ad       1305:        rp = (file_t **)CMSG_DATA(cm);
1.138     christos 1306:        int *ofdp = (int *)CMSG_DATA(cm);
                   1307:        for (size_t i = 0; i < nfds; i++) {
                   1308:                file_t * const fp = *rp++;
                   1309:                const int fd = fdp[i];
1.106     ad       1310:                atomic_dec_uint(&unp_rights);
1.136     christos 1311:                fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
                   1312:                fd_affix(p, fp, fd);
1.138     christos 1313:                /*
                   1314:                 * Done with this file pointer, replace it with a fd;
                   1315:                 */
                   1316:                *ofdp++ = fd;
1.106     ad       1317:                mutex_enter(&fp->f_lock);
1.50      thorpej  1318:                fp->f_msgcount--;
1.106     ad       1319:                mutex_exit(&fp->f_lock);
                   1320:                /*
                   1321:                 * Note that fd_affix() adds a reference to the file.
                   1322:                 * The file may already have been closed by another
                   1323:                 * LWP in the process, so we must drop the reference
                   1324:                 * added by unp_internalize() with closef().
                   1325:                 */
                   1326:                closef(fp);
1.50      thorpej  1327:        }
                   1328:
                   1329:        /*
1.138     christos 1330:         * Adjust length, in case of transition from large file_t
                   1331:         * pointers to ints.
1.50      thorpej  1332:         */
1.138     christos 1333:        if (sizeof(file_t *) != sizeof(int)) {
                   1334:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                   1335:                rights->m_len = CMSG_SPACE(nfds * sizeof(int));
                   1336:        }
1.50      thorpej  1337:  out:
1.138     christos 1338:        if (__predict_false(error != 0)) {
                   1339:                rp = (file_t **)CMSG_DATA(cm);
                   1340:                for (size_t i = 0; i < nfds; i++) {
                   1341:                        file_t * const fp = *rp;
                   1342:                        *rp++ = 0;
                   1343:                        unp_discard_now(fp);
                   1344:                }
                   1345:        }
                   1346:
1.101     ad       1347:        rw_exit(&p->p_cwdi->cwdi_lock);
1.138     christos 1348:        kmem_free(fdp, nfds * sizeof(int));
1.139     christos 1349:        return error;
1.1       cgd      1350: }
                   1351:
1.5       andrew   1352: int
1.112     ad       1353: unp_internalize(struct mbuf **controlp)
1.1       cgd      1354: {
1.121     mrg      1355:        filedesc_t *fdescp = curlwp->l_fd;
1.108     yamt     1356:        struct mbuf *control = *controlp;
1.73      martin   1357:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
1.106     ad       1358:        file_t **rp, **files;
                   1359:        file_t *fp;
1.46      augustss 1360:        int i, fd, *fdp;
1.106     ad       1361:        int nfds, error;
1.121     mrg      1362:        u_int maxmsg;
1.106     ad       1363:
                   1364:        error = 0;
                   1365:        newcm = NULL;
1.38      thorpej  1366:
1.106     ad       1367:        /* Sanity check the control message header. */
1.66      jdolecek 1368:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.117     christos 1369:            cm->cmsg_len > control->m_len ||
                   1370:            cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
1.1       cgd      1371:                return (EINVAL);
1.24      cgd      1372:
1.106     ad       1373:        /*
                   1374:         * Verify that the file descriptors are valid, and acquire
                   1375:         * a reference to each.
                   1376:         */
1.47      thorpej  1377:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                   1378:        fdp = (int *)CMSG_DATA(cm);
1.121     mrg      1379:        maxmsg = maxfiles / unp_rights_ratio;
1.24      cgd      1380:        for (i = 0; i < nfds; i++) {
                   1381:                fd = *fdp++;
1.121     mrg      1382:                if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
                   1383:                        atomic_dec_uint(&unp_rights);
                   1384:                        nfds = i;
                   1385:                        error = EAGAIN;
                   1386:                        goto out;
                   1387:                }
1.137     martin   1388:                if ((fp = fd_getfile(fd)) == NULL
                   1389:                    || fp->f_type == DTYPE_KQUEUE) {
                   1390:                        if (fp)
                   1391:                                fd_putfile(fd);
1.121     mrg      1392:                        atomic_dec_uint(&unp_rights);
1.120     pooka    1393:                        nfds = i;
1.106     ad       1394:                        error = EBADF;
                   1395:                        goto out;
1.101     ad       1396:                }
1.24      cgd      1397:        }
                   1398:
1.106     ad       1399:        /* Allocate new space and copy header into it. */
                   1400:        newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
                   1401:        if (newcm == NULL) {
                   1402:                error = E2BIG;
                   1403:                goto out;
                   1404:        }
                   1405:        memcpy(newcm, cm, sizeof(struct cmsghdr));
                   1406:        files = (file_t **)CMSG_DATA(newcm);
                   1407:
1.24      cgd      1408:        /*
1.106     ad       1409:         * Transform the file descriptors into file_t pointers, in
1.24      cgd      1410:         * reverse order so that if pointers are bigger than ints, the
1.106     ad       1411:         * int won't get until we're done.  No need to lock, as we have
                   1412:         * already validated the descriptors with fd_getfile().
1.24      cgd      1413:         */
1.94      cbiere   1414:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1415:        rp = files + nfds;
1.24      cgd      1416:        for (i = 0; i < nfds; i++) {
1.126     ad       1417:                fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file;
1.106     ad       1418:                KASSERT(fp != NULL);
                   1419:                mutex_enter(&fp->f_lock);
1.94      cbiere   1420:                *--rp = fp;
1.1       cgd      1421:                fp->f_count++;
                   1422:                fp->f_msgcount++;
1.106     ad       1423:                mutex_exit(&fp->f_lock);
                   1424:        }
                   1425:
                   1426:  out:
                   1427:        /* Release descriptor references. */
                   1428:        fdp = (int *)CMSG_DATA(cm);
                   1429:        for (i = 0; i < nfds; i++) {
                   1430:                fd_putfile(*fdp++);
1.121     mrg      1431:                if (error != 0) {
                   1432:                        atomic_dec_uint(&unp_rights);
                   1433:                }
1.1       cgd      1434:        }
1.73      martin   1435:
1.106     ad       1436:        if (error == 0) {
1.108     yamt     1437:                if (control->m_flags & M_EXT) {
                   1438:                        m_freem(control);
                   1439:                        *controlp = control = m_get(M_WAIT, MT_CONTROL);
                   1440:                }
1.106     ad       1441:                MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
1.73      martin   1442:                    M_MBUF, NULL, NULL);
                   1443:                cm = newcm;
1.106     ad       1444:                /*
                   1445:                 * Adjust message & mbuf to note amount of space
                   1446:                 * actually used.
                   1447:                 */
                   1448:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
                   1449:                control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
1.73      martin   1450:        }
                   1451:
1.106     ad       1452:        return error;
1.30      thorpej  1453: }
                   1454:
                   1455: struct mbuf *
1.92      ad       1456: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1457: {
                   1458:        struct cmsghdr *cmp;
                   1459:        struct sockcred *sc;
                   1460:        struct mbuf *m, *n;
1.47      thorpej  1461:        int len, space, i;
1.30      thorpej  1462:
1.92      ad       1463:        len = CMSG_LEN(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
                   1464:        space = CMSG_SPACE(SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)));
1.30      thorpej  1465:
                   1466:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej  1467:        if (space > MLEN) {
                   1468:                if (space > MCLBYTES)
                   1469:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej  1470:                else
1.59      matt     1471:                        m_clget(m, M_WAIT);
1.30      thorpej  1472:                if ((m->m_flags & M_EXT) == 0) {
                   1473:                        m_free(m);
                   1474:                        return (control);
                   1475:                }
                   1476:        }
                   1477:
1.47      thorpej  1478:        m->m_len = space;
1.30      thorpej  1479:        m->m_next = NULL;
                   1480:        cmp = mtod(m, struct cmsghdr *);
                   1481:        sc = (struct sockcred *)CMSG_DATA(cmp);
                   1482:        cmp->cmsg_len = len;
                   1483:        cmp->cmsg_level = SOL_SOCKET;
                   1484:        cmp->cmsg_type = SCM_CREDS;
1.92      ad       1485:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1486:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1487:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1488:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1489:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.30      thorpej  1490:        for (i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1491:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1492:
                   1493:        /*
                   1494:         * If a control message already exists, append us to the end.
                   1495:         */
                   1496:        if (control != NULL) {
                   1497:                for (n = control; n->m_next != NULL; n = n->m_next)
                   1498:                        ;
                   1499:                n->m_next = m;
                   1500:        } else
                   1501:                control = m;
                   1502:
                   1503:        return (control);
1.1       cgd      1504: }
                   1505:
1.39      sommerfe 1506: /*
1.121     mrg      1507:  * Do a mark-sweep GC of files in the system, to free up any which are
                   1508:  * caught in flight to an about-to-be-closed socket.  Additionally,
                   1509:  * process deferred file closures.
1.39      sommerfe 1510:  */
1.121     mrg      1511: static void
                   1512: unp_gc(file_t *dp)
1.1       cgd      1513: {
1.121     mrg      1514:        extern  struct domain unixdomain;
                   1515:        file_t *fp, *np;
1.46      augustss 1516:        struct socket *so, *so1;
1.121     mrg      1517:        u_int i, old, new;
                   1518:        bool didwork;
1.1       cgd      1519:
1.121     mrg      1520:        KASSERT(curlwp == unp_thread_lwp);
                   1521:        KASSERT(mutex_owned(&filelist_lock));
1.106     ad       1522:
1.121     mrg      1523:        /*
                   1524:         * First, process deferred file closures.
                   1525:         */
                   1526:        while (!SLIST_EMPTY(&unp_thread_discard)) {
                   1527:                fp = SLIST_FIRST(&unp_thread_discard);
                   1528:                KASSERT(fp->f_unpcount > 0);
                   1529:                KASSERT(fp->f_count > 0);
                   1530:                KASSERT(fp->f_msgcount > 0);
                   1531:                KASSERT(fp->f_count >= fp->f_unpcount);
                   1532:                KASSERT(fp->f_count >= fp->f_msgcount);
                   1533:                KASSERT(fp->f_msgcount >= fp->f_unpcount);
                   1534:                SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
                   1535:                i = fp->f_unpcount;
                   1536:                fp->f_unpcount = 0;
                   1537:                mutex_exit(&filelist_lock);
                   1538:                for (; i != 0; i--) {
                   1539:                        unp_discard_now(fp);
                   1540:                }
                   1541:                mutex_enter(&filelist_lock);
                   1542:        }
1.39      sommerfe 1543:
1.121     mrg      1544:        /*
                   1545:         * Clear mark bits.  Ensure that we don't consider new files
                   1546:         * entering the file table during this loop (they will not have
                   1547:         * FSCAN set).
                   1548:         */
1.106     ad       1549:        unp_defer = 0;
                   1550:        LIST_FOREACH(fp, &filehead, f_list) {
1.121     mrg      1551:                for (old = fp->f_flag;; old = new) {
                   1552:                        new = atomic_cas_uint(&fp->f_flag, old,
                   1553:                            (old | FSCAN) & ~(FMARK|FDEFER));
                   1554:                        if (__predict_true(old == new)) {
                   1555:                                break;
                   1556:                        }
                   1557:                }
1.106     ad       1558:        }
1.39      sommerfe 1559:
                   1560:        /*
1.121     mrg      1561:         * Iterate over the set of sockets, marking ones believed (based on
                   1562:         * refcount) to be referenced from a process, and marking for rescan
                   1563:         * sockets which are queued on a socket.  Recan continues descending
                   1564:         * and searching for sockets referenced by sockets (FDEFER), until
                   1565:         * there are no more socket->socket references to be discovered.
1.39      sommerfe 1566:         */
1.1       cgd      1567:        do {
1.121     mrg      1568:                didwork = false;
                   1569:                for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1570:                        KASSERT(mutex_owned(&filelist_lock));
                   1571:                        np = LIST_NEXT(fp, f_list);
1.106     ad       1572:                        mutex_enter(&fp->f_lock);
1.121     mrg      1573:                        if ((fp->f_flag & FDEFER) != 0) {
1.106     ad       1574:                                atomic_and_uint(&fp->f_flag, ~FDEFER);
1.1       cgd      1575:                                unp_defer--;
1.106     ad       1576:                                KASSERT(fp->f_count != 0);
1.1       cgd      1577:                        } else {
1.101     ad       1578:                                if (fp->f_count == 0 ||
1.121     mrg      1579:                                    (fp->f_flag & FMARK) != 0 ||
                   1580:                                    fp->f_count == fp->f_msgcount ||
                   1581:                                    fp->f_unpcount != 0) {
1.106     ad       1582:                                        mutex_exit(&fp->f_lock);
1.1       cgd      1583:                                        continue;
1.101     ad       1584:                                }
1.1       cgd      1585:                        }
1.106     ad       1586:                        atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1587:
1.1       cgd      1588:                        if (fp->f_type != DTYPE_SOCKET ||
1.112     ad       1589:                            (so = fp->f_data) == NULL ||
1.101     ad       1590:                            so->so_proto->pr_domain != &unixdomain ||
1.121     mrg      1591:                            (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
1.106     ad       1592:                                mutex_exit(&fp->f_lock);
1.1       cgd      1593:                                continue;
1.101     ad       1594:                        }
1.121     mrg      1595:
                   1596:                        /* Gain file ref, mark our position, and unlock. */
                   1597:                        didwork = true;
                   1598:                        LIST_INSERT_AFTER(fp, dp, f_list);
                   1599:                        fp->f_count++;
1.106     ad       1600:                        mutex_exit(&fp->f_lock);
1.121     mrg      1601:                        mutex_exit(&filelist_lock);
1.101     ad       1602:
1.112     ad       1603:                        /*
1.121     mrg      1604:                         * Mark files referenced from sockets queued on the
                   1605:                         * accept queue as well.
1.112     ad       1606:                         */
                   1607:                        solock(so);
1.39      sommerfe 1608:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1.121     mrg      1609:                        if ((so->so_options & SO_ACCEPTCONN) != 0) {
1.54      matt     1610:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1611:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1612:                                }
1.54      matt     1613:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1614:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1615:                                }
                   1616:                        }
1.112     ad       1617:                        sounlock(so);
1.121     mrg      1618:
                   1619:                        /* Re-lock and restart from where we left off. */
                   1620:                        closef(fp);
                   1621:                        mutex_enter(&filelist_lock);
                   1622:                        np = LIST_NEXT(dp, f_list);
                   1623:                        LIST_REMOVE(dp, f_list);
1.1       cgd      1624:                }
1.121     mrg      1625:                /*
                   1626:                 * Bail early if we did nothing in the loop above.  Could
                   1627:                 * happen because of concurrent activity causing unp_defer
                   1628:                 * to get out of sync.
                   1629:                 */
                   1630:        } while (unp_defer != 0 && didwork);
1.101     ad       1631:
1.8       mycroft  1632:        /*
1.121     mrg      1633:         * Sweep pass.
1.8       mycroft  1634:         *
1.121     mrg      1635:         * We grab an extra reference to each of the files that are
                   1636:         * not otherwise accessible and then free the rights that are
                   1637:         * stored in messages on them.
1.8       mycroft  1638:         */
1.121     mrg      1639:        for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1640:                KASSERT(mutex_owned(&filelist_lock));
                   1641:                np = LIST_NEXT(fp, f_list);
1.106     ad       1642:                mutex_enter(&fp->f_lock);
1.121     mrg      1643:
                   1644:                /*
                   1645:                 * Ignore non-sockets.
                   1646:                 * Ignore dead sockets, or sockets with pending close.
                   1647:                 * Ignore sockets obviously referenced elsewhere.
                   1648:                 * Ignore sockets marked as referenced by our scan.
                   1649:                 * Ignore new sockets that did not exist during the scan.
                   1650:                 */
                   1651:                if (fp->f_type != DTYPE_SOCKET ||
                   1652:                    fp->f_count == 0 || fp->f_unpcount != 0 ||
                   1653:                    fp->f_count != fp->f_msgcount ||
                   1654:                    (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
                   1655:                        mutex_exit(&fp->f_lock);
                   1656:                        continue;
1.8       mycroft  1657:                }
1.121     mrg      1658:
                   1659:                /* Gain file ref, mark our position, and unlock. */
                   1660:                LIST_INSERT_AFTER(fp, dp, f_list);
                   1661:                fp->f_count++;
1.106     ad       1662:                mutex_exit(&fp->f_lock);
1.121     mrg      1663:                mutex_exit(&filelist_lock);
                   1664:
                   1665:                /*
                   1666:                 * Flush all data from the socket's receive buffer.
                   1667:                 * This will cause files referenced only by the
                   1668:                 * socket to be queued for close.
                   1669:                 */
                   1670:                so = fp->f_data;
                   1671:                solock(so);
                   1672:                sorflush(so);
                   1673:                sounlock(so);
                   1674:
                   1675:                /* Re-lock and restart from where we left off. */
                   1676:                closef(fp);
                   1677:                mutex_enter(&filelist_lock);
                   1678:                np = LIST_NEXT(dp, f_list);
                   1679:                LIST_REMOVE(dp, f_list);
                   1680:        }
                   1681: }
                   1682:
                   1683: /*
                   1684:  * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
                   1685:  * wake once per second to garbage collect.  Run continually while we
                   1686:  * have deferred closes to process.
                   1687:  */
                   1688: static void
                   1689: unp_thread(void *cookie)
                   1690: {
                   1691:        file_t *dp;
                   1692:
                   1693:        /* Allocate a dummy file for our scans. */
                   1694:        if ((dp = fgetdummy()) == NULL) {
                   1695:                panic("unp_thread");
1.1       cgd      1696:        }
1.101     ad       1697:
1.121     mrg      1698:        mutex_enter(&filelist_lock);
                   1699:        for (;;) {
                   1700:                KASSERT(mutex_owned(&filelist_lock));
                   1701:                if (SLIST_EMPTY(&unp_thread_discard)) {
                   1702:                        if (unp_rights != 0) {
                   1703:                                (void)cv_timedwait(&unp_thread_cv,
                   1704:                                    &filelist_lock, hz);
                   1705:                        } else {
                   1706:                                cv_wait(&unp_thread_cv, &filelist_lock);
                   1707:                        }
1.112     ad       1708:                }
1.121     mrg      1709:                unp_gc(dp);
1.39      sommerfe 1710:        }
1.121     mrg      1711:        /* NOTREACHED */
                   1712: }
                   1713:
                   1714: /*
                   1715:  * Kick the garbage collector into action if there is something for
                   1716:  * it to process.
                   1717:  */
                   1718: static void
                   1719: unp_thread_kick(void)
                   1720: {
                   1721:
                   1722:        if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
                   1723:                mutex_enter(&filelist_lock);
                   1724:                cv_signal(&unp_thread_cv);
                   1725:                mutex_exit(&filelist_lock);
1.44      thorpej  1726:        }
1.1       cgd      1727: }
                   1728:
1.5       andrew   1729: void
1.76      matt     1730: unp_dispose(struct mbuf *m)
1.1       cgd      1731: {
1.8       mycroft  1732:
1.1       cgd      1733:        if (m)
1.121     mrg      1734:                unp_scan(m, unp_discard_later, 1);
1.1       cgd      1735: }
                   1736:
1.5       andrew   1737: void
1.106     ad       1738: unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
1.1       cgd      1739: {
1.46      augustss 1740:        struct mbuf *m;
1.121     mrg      1741:        file_t **rp, *fp;
1.46      augustss 1742:        struct cmsghdr *cm;
1.121     mrg      1743:        int i, qfds;
1.1       cgd      1744:
                   1745:        while (m0) {
1.48      thorpej  1746:                for (m = m0; m; m = m->m_next) {
1.121     mrg      1747:                        if (m->m_type != MT_CONTROL ||
                   1748:                            m->m_len < sizeof(*cm)) {
                   1749:                                continue;
                   1750:                        }
                   1751:                        cm = mtod(m, struct cmsghdr *);
                   1752:                        if (cm->cmsg_level != SOL_SOCKET ||
                   1753:                            cm->cmsg_type != SCM_RIGHTS)
                   1754:                                continue;
                   1755:                        qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1756:                            / sizeof(file_t *);
                   1757:                        rp = (file_t **)CMSG_DATA(cm);
                   1758:                        for (i = 0; i < qfds; i++) {
                   1759:                                fp = *rp;
                   1760:                                if (discard) {
                   1761:                                        *rp = 0;
1.39      sommerfe 1762:                                }
1.121     mrg      1763:                                (*op)(fp);
                   1764:                                rp++;
1.1       cgd      1765:                        }
1.48      thorpej  1766:                }
1.52      thorpej  1767:                m0 = m0->m_nextpkt;
1.1       cgd      1768:        }
                   1769: }
                   1770:
1.5       andrew   1771: void
1.106     ad       1772: unp_mark(file_t *fp)
1.1       cgd      1773: {
1.101     ad       1774:
1.39      sommerfe 1775:        if (fp == NULL)
                   1776:                return;
1.80      perry    1777:
1.39      sommerfe 1778:        /* If we're already deferred, don't screw up the defer count */
1.106     ad       1779:        mutex_enter(&fp->f_lock);
1.101     ad       1780:        if (fp->f_flag & (FMARK | FDEFER)) {
1.106     ad       1781:                mutex_exit(&fp->f_lock);
1.1       cgd      1782:                return;
1.101     ad       1783:        }
1.39      sommerfe 1784:
                   1785:        /*
1.121     mrg      1786:         * Minimize the number of deferrals...  Sockets are the only type of
                   1787:         * file which can hold references to another file, so just mark
                   1788:         * other files, and defer unmarked sockets for the next pass.
1.39      sommerfe 1789:         */
                   1790:        if (fp->f_type == DTYPE_SOCKET) {
                   1791:                unp_defer++;
1.106     ad       1792:                KASSERT(fp->f_count != 0);
                   1793:                atomic_or_uint(&fp->f_flag, FDEFER);
1.39      sommerfe 1794:        } else {
1.106     ad       1795:                atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1796:        }
1.106     ad       1797:        mutex_exit(&fp->f_lock);
1.1       cgd      1798: }
                   1799:
1.121     mrg      1800: static void
                   1801: unp_discard_now(file_t *fp)
1.1       cgd      1802: {
1.106     ad       1803:
1.39      sommerfe 1804:        if (fp == NULL)
                   1805:                return;
1.106     ad       1806:
1.121     mrg      1807:        KASSERT(fp->f_count > 0);
                   1808:        KASSERT(fp->f_msgcount > 0);
                   1809:
1.106     ad       1810:        mutex_enter(&fp->f_lock);
1.1       cgd      1811:        fp->f_msgcount--;
1.106     ad       1812:        mutex_exit(&fp->f_lock);
                   1813:        atomic_dec_uint(&unp_rights);
                   1814:        (void)closef(fp);
1.1       cgd      1815: }
1.121     mrg      1816:
                   1817: static void
                   1818: unp_discard_later(file_t *fp)
                   1819: {
                   1820:
                   1821:        if (fp == NULL)
                   1822:                return;
                   1823:
                   1824:        KASSERT(fp->f_count > 0);
                   1825:        KASSERT(fp->f_msgcount > 0);
                   1826:
                   1827:        mutex_enter(&filelist_lock);
                   1828:        if (fp->f_unpcount++ == 0) {
                   1829:                SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
                   1830:        }
                   1831:        mutex_exit(&filelist_lock);
                   1832: }

CVSweb <webmaster@jp.NetBSD.org>