[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.157

1.157   ! rtr         1: /*     $NetBSD: uipc_usrreq.c,v 1.156 2014/07/06 03:33:33 rtr Exp $    */
1.30      thorpej     2:
                      3: /*-
1.121     mrg         4:  * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.121     mrg         9:  * NASA Ames Research Center, and by Andrew Doran.
1.30      thorpej    10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  *
                     20:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     21:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     22:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     23:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     24:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     25:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     26:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     27:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     28:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     29:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     30:  * POSSIBILITY OF SUCH DAMAGE.
                     31:  */
1.10      cgd        32:
1.1       cgd        33: /*
1.8       mycroft    34:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     35:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        36:  *
                     37:  * Redistribution and use in source and binary forms, with or without
                     38:  * modification, are permitted provided that the following conditions
                     39:  * are met:
                     40:  * 1. Redistributions of source code must retain the above copyright
                     41:  *    notice, this list of conditions and the following disclaimer.
                     42:  * 2. Redistributions in binary form must reproduce the above copyright
                     43:  *    notice, this list of conditions and the following disclaimer in the
                     44:  *    documentation and/or other materials provided with the distribution.
1.67      agc        45:  * 3. Neither the name of the University nor the names of its contributors
                     46:  *    may be used to endorse or promote products derived from this software
                     47:  *    without specific prior written permission.
                     48:  *
                     49:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     50:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     51:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     52:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     53:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     54:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     55:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     56:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     57:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     58:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     59:  * SUCH DAMAGE.
                     60:  *
                     61:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
                     62:  */
                     63:
                     64: /*
                     65:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
                     66:  *
                     67:  * Redistribution and use in source and binary forms, with or without
                     68:  * modification, are permitted provided that the following conditions
                     69:  * are met:
                     70:  * 1. Redistributions of source code must retain the above copyright
                     71:  *    notice, this list of conditions and the following disclaimer.
                     72:  * 2. Redistributions in binary form must reproduce the above copyright
                     73:  *    notice, this list of conditions and the following disclaimer in the
                     74:  *    documentation and/or other materials provided with the distribution.
1.1       cgd        75:  * 3. All advertising materials mentioning features or use of this software
                     76:  *    must display the following acknowledgement:
                     77:  *     This product includes software developed by the University of
                     78:  *     California, Berkeley and its contributors.
                     79:  * 4. Neither the name of the University nor the names of its contributors
                     80:  *    may be used to endorse or promote products derived from this software
                     81:  *    without specific prior written permission.
                     82:  *
                     83:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     84:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     85:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     86:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     87:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     88:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     89:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     90:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     91:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     92:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     93:  * SUCH DAMAGE.
                     94:  *
1.31      fvdl       95:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd        96:  */
1.53      lukem      97:
                     98: #include <sys/cdefs.h>
1.157   ! rtr        99: __KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.156 2014/07/06 03:33:33 rtr Exp $");
1.1       cgd       100:
1.7       mycroft   101: #include <sys/param.h>
1.8       mycroft   102: #include <sys/systm.h>
1.7       mycroft   103: #include <sys/proc.h>
                    104: #include <sys/filedesc.h>
                    105: #include <sys/domain.h>
                    106: #include <sys/protosw.h>
                    107: #include <sys/socket.h>
                    108: #include <sys/socketvar.h>
                    109: #include <sys/unpcb.h>
                    110: #include <sys/un.h>
                    111: #include <sys/namei.h>
                    112: #include <sys/vnode.h>
                    113: #include <sys/file.h>
                    114: #include <sys/stat.h>
                    115: #include <sys/mbuf.h>
1.91      elad      116: #include <sys/kauth.h>
1.101     ad        117: #include <sys/kmem.h>
1.106     ad        118: #include <sys/atomic.h>
1.119     pooka     119: #include <sys/uidinfo.h>
1.121     mrg       120: #include <sys/kernel.h>
                    121: #include <sys/kthread.h>
1.1       cgd       122:
                    123: /*
                    124:  * Unix communications domain.
                    125:  *
                    126:  * TODO:
1.134     manu      127:  *     RDM
1.1       cgd       128:  *     rethink name space problems
                    129:  *     need a proper out-of-band
1.112     ad        130:  *
                    131:  * Notes on locking:
                    132:  *
                    133:  * The generic rules noted in uipc_socket2.c apply.  In addition:
                    134:  *
                    135:  * o We have a global lock, uipc_lock.
                    136:  *
                    137:  * o All datagram sockets are locked by uipc_lock.
                    138:  *
                    139:  * o For stream socketpairs, the two endpoints are created sharing the same
                    140:  *   independent lock.  Sockets presented to PRU_CONNECT2 must already have
                    141:  *   matching locks.
                    142:  *
                    143:  * o Stream sockets created via socket() start life with their own
                    144:  *   independent lock.
                    145:  *
                    146:  * o Stream connections to a named endpoint are slightly more complicated.
                    147:  *   Sockets that have called listen() have their lock pointer mutated to
                    148:  *   the global uipc_lock.  When establishing a connection, the connecting
                    149:  *   socket also has its lock mutated to uipc_lock, which matches the head
                    150:  *   (listening socket).  We create a new socket for accept() to return, and
                    151:  *   that also shares the head's lock.  Until the connection is completely
                    152:  *   done on both ends, all three sockets are locked by uipc_lock.  Once the
                    153:  *   connection is complete, the association with the head's lock is broken.
                    154:  *   The connecting socket and the socket returned from accept() have their
                    155:  *   lock pointers mutated away from uipc_lock, and back to the connecting
                    156:  *   socket's original, independent lock.  The head continues to be locked
                    157:  *   by uipc_lock.
                    158:  *
                    159:  * o If uipc_lock is determined to be a significant source of contention,
                    160:  *   it could easily be hashed out.  It is difficult to simply make it an
                    161:  *   independent lock because of visibility / garbage collection issues:
                    162:  *   if a socket has been associated with a lock at any point, that lock
                    163:  *   must remain valid until the socket is no longer visible in the system.
                    164:  *   The lock must not be freed or otherwise destroyed until any sockets
                    165:  *   that had referenced it have also been destroyed.
1.1       cgd       166:  */
1.93      christos  167: const struct sockaddr_un sun_noname = {
1.145     christos  168:        .sun_len = offsetof(struct sockaddr_un, sun_path),
1.93      christos  169:        .sun_family = AF_LOCAL,
                    170: };
1.1       cgd       171: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    172:
1.152     rmind     173: static void unp_detach(struct socket *);
1.92      ad        174: struct mbuf *unp_addsockcred(struct lwp *, struct mbuf *);
1.121     mrg       175: static void unp_mark(file_t *);
                    176: static void unp_scan(struct mbuf *, void (*)(file_t *), int);
                    177: static void unp_discard_now(file_t *);
                    178: static void unp_discard_later(file_t *);
                    179: static void unp_thread(void *);
                    180: static void unp_thread_kick(void);
1.112     ad        181: static kmutex_t *uipc_lock;
                    182:
1.121     mrg       183: static kcondvar_t unp_thread_cv;
                    184: static lwp_t *unp_thread_lwp;
                    185: static SLIST_HEAD(,file) unp_thread_discard;
                    186: static int unp_defer;
                    187:
1.112     ad        188: /*
                    189:  * Initialize Unix protocols.
                    190:  */
                    191: void
                    192: uipc_init(void)
                    193: {
1.121     mrg       194:        int error;
1.112     ad        195:
                    196:        uipc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
1.121     mrg       197:        cv_init(&unp_thread_cv, "unpgc");
                    198:
                    199:        error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, unp_thread,
                    200:            NULL, &unp_thread_lwp, "unpgc");
                    201:        if (error != 0)
                    202:                panic("uipc_init %d", error);
1.112     ad        203: }
                    204:
                    205: /*
                    206:  * A connection succeeded: disassociate both endpoints from the head's
                    207:  * lock, and make them share their own lock.  There is a race here: for
                    208:  * a very brief time one endpoint will be locked by a different lock
                    209:  * than the other end.  However, since the current thread holds the old
                    210:  * lock (the listening socket's lock, the head) access can still only be
                    211:  * made to one side of the connection.
                    212:  */
                    213: static void
                    214: unp_setpeerlocks(struct socket *so, struct socket *so2)
                    215: {
                    216:        struct unpcb *unp;
                    217:        kmutex_t *lock;
                    218:
                    219:        KASSERT(solocked2(so, so2));
                    220:
                    221:        /*
                    222:         * Bail out if either end of the socket is not yet fully
                    223:         * connected or accepted.  We only break the lock association
                    224:         * with the head when the pair of sockets stand completely
                    225:         * on their own.
                    226:         */
1.125     yamt      227:        KASSERT(so->so_head == NULL);
                    228:        if (so2->so_head != NULL)
1.112     ad        229:                return;
                    230:
                    231:        /*
                    232:         * Drop references to old lock.  A third reference (from the
                    233:         * queue head) must be held as we still hold its lock.  Bonus:
                    234:         * we don't need to worry about garbage collecting the lock.
                    235:         */
                    236:        lock = so->so_lock;
                    237:        KASSERT(lock == uipc_lock);
                    238:        mutex_obj_free(lock);
                    239:        mutex_obj_free(lock);
                    240:
                    241:        /*
                    242:         * Grab stream lock from the initiator and share between the two
                    243:         * endpoints.  Issue memory barrier to ensure all modifications
                    244:         * become globally visible before the lock change.  so2 is
                    245:         * assumed not to have a stream lock, because it was created
                    246:         * purely for the server side to accept this connection and
                    247:         * started out life using the domain-wide lock.
                    248:         */
                    249:        unp = sotounpcb(so);
                    250:        KASSERT(unp->unp_streamlock != NULL);
                    251:        KASSERT(sotounpcb(so2)->unp_streamlock == NULL);
                    252:        lock = unp->unp_streamlock;
                    253:        unp->unp_streamlock = NULL;
                    254:        mutex_obj_hold(lock);
                    255:        membar_exit();
1.127     bouyer    256:        /*
                    257:         * possible race if lock is not held - see comment in
                    258:         * uipc_usrreq(PRU_ACCEPT).
                    259:         */
                    260:        KASSERT(mutex_owned(lock));
1.115     ad        261:        solockreset(so, lock);
                    262:        solockreset(so2, lock);
1.112     ad        263: }
                    264:
                    265: /*
                    266:  * Reset a socket's lock back to the domain-wide lock.
                    267:  */
                    268: static void
                    269: unp_resetlock(struct socket *so)
                    270: {
                    271:        kmutex_t *olock, *nlock;
                    272:        struct unpcb *unp;
                    273:
                    274:        KASSERT(solocked(so));
                    275:
                    276:        olock = so->so_lock;
                    277:        nlock = uipc_lock;
                    278:        if (olock == nlock)
                    279:                return;
                    280:        unp = sotounpcb(so);
                    281:        KASSERT(unp->unp_streamlock == NULL);
                    282:        unp->unp_streamlock = olock;
                    283:        mutex_obj_hold(nlock);
                    284:        mutex_enter(nlock);
1.115     ad        285:        solockreset(so, nlock);
1.112     ad        286:        mutex_exit(olock);
                    287: }
                    288:
                    289: static void
                    290: unp_free(struct unpcb *unp)
                    291: {
                    292:        if (unp->unp_addr)
                    293:                free(unp->unp_addr, M_SONAME);
                    294:        if (unp->unp_streamlock != NULL)
                    295:                mutex_obj_free(unp->unp_streamlock);
1.152     rmind     296:        kmem_free(unp, sizeof(*unp));
1.112     ad        297: }
1.30      thorpej   298:
1.20      mycroft   299: int
1.76      matt      300: unp_output(struct mbuf *m, struct mbuf *control, struct unpcb *unp,
1.92      ad        301:        struct lwp *l)
1.20      mycroft   302: {
                    303:        struct socket *so2;
1.77      matt      304:        const struct sockaddr_un *sun;
1.20      mycroft   305:
1.153     christos  306:        /* XXX: server side closed the socket */
                    307:        if (unp->unp_conn == NULL)
                    308:                return ECONNREFUSED;
1.20      mycroft   309:        so2 = unp->unp_conn->unp_socket;
1.112     ad        310:
                    311:        KASSERT(solocked(so2));
                    312:
1.20      mycroft   313:        if (unp->unp_addr)
                    314:                sun = unp->unp_addr;
                    315:        else
                    316:                sun = &sun_noname;
1.30      thorpej   317:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
1.92      ad        318:                control = unp_addsockcred(l, control);
1.82      christos  319:        if (sbappendaddr(&so2->so_rcv, (const struct sockaddr *)sun, m,
1.20      mycroft   320:            control) == 0) {
1.112     ad        321:                so2->so_rcv.sb_overflowed++;
1.98      martin    322:                unp_dispose(control);
1.20      mycroft   323:                m_freem(control);
                    324:                m_freem(m);
1.60      christos  325:                return (ENOBUFS);
1.20      mycroft   326:        } else {
                    327:                sorwakeup(so2);
                    328:                return (0);
                    329:        }
                    330: }
                    331:
                    332: void
1.112     ad        333: unp_setaddr(struct socket *so, struct mbuf *nam, bool peeraddr)
1.20      mycroft   334: {
1.77      matt      335:        const struct sockaddr_un *sun;
1.112     ad        336:        struct unpcb *unp;
                    337:        bool ext;
1.20      mycroft   338:
1.127     bouyer    339:        KASSERT(solocked(so));
1.112     ad        340:        unp = sotounpcb(so);
                    341:        ext = false;
1.20      mycroft   342:
1.112     ad        343:        for (;;) {
                    344:                sun = NULL;
                    345:                if (peeraddr) {
                    346:                        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    347:                                sun = unp->unp_conn->unp_addr;
                    348:                } else {
                    349:                        if (unp->unp_addr)
                    350:                                sun = unp->unp_addr;
                    351:                }
                    352:                if (sun == NULL)
                    353:                        sun = &sun_noname;
                    354:                nam->m_len = sun->sun_len;
                    355:                if (nam->m_len > MLEN && !ext) {
                    356:                        sounlock(so);
                    357:                        MEXTMALLOC(nam, MAXPATHLEN * 2, M_WAITOK);
                    358:                        solock(so);
                    359:                        ext = true;
                    360:                } else {
                    361:                        KASSERT(nam->m_len <= MAXPATHLEN * 2);
                    362:                        memcpy(mtod(nam, void *), sun, (size_t)nam->m_len);
                    363:                        break;
                    364:                }
                    365:        }
1.20      mycroft   366: }
                    367:
1.151     rmind     368: static int
                    369: unp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
                    370:     struct mbuf *control, struct lwp *l)
1.1       cgd       371: {
1.152     rmind     372:        struct unpcb *unp;
1.46      augustss  373:        struct socket *so2;
1.75      christos  374:        u_int newhiwat;
1.46      augustss  375:        int error = 0;
1.1       cgd       376:
1.152     rmind     377:        KASSERT(req != PRU_ATTACH);
                    378:        KASSERT(req != PRU_DETACH);
1.154     rtr       379:        KASSERT(req != PRU_CONTROL);
1.156     rtr       380:        KASSERT(req != PRU_SENSE);
1.152     rmind     381:
                    382:        KASSERT(solocked(so));
                    383:        unp = sotounpcb(so);
1.20      mycroft   384:
1.152     rmind     385:        KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB));
                    386:        if (unp == NULL) {
                    387:                error = EINVAL;
                    388:                goto release;
1.1       cgd       389:        }
1.20      mycroft   390:
1.1       cgd       391:        switch (req) {
                    392:        case PRU_BIND:
1.90      christos  393:                KASSERT(l != NULL);
1.112     ad        394:                error = unp_bind(so, nam, l);
1.1       cgd       395:                break;
                    396:
                    397:        case PRU_LISTEN:
1.112     ad        398:                /*
                    399:                 * If the socket can accept a connection, it must be
                    400:                 * locked by uipc_lock.
                    401:                 */
                    402:                unp_resetlock(so);
1.122     yamt      403:                if (unp->unp_vnode == NULL)
1.1       cgd       404:                        error = EINVAL;
                    405:                break;
                    406:
                    407:        case PRU_CONNECT:
1.90      christos  408:                KASSERT(l != NULL);
1.86      christos  409:                error = unp_connect(so, nam, l);
1.1       cgd       410:                break;
                    411:
                    412:        case PRU_CONNECT2:
1.72      matt      413:                error = unp_connect2(so, (struct socket *)nam, PRU_CONNECT2);
1.1       cgd       414:                break;
                    415:
                    416:        case PRU_DISCONNECT:
                    417:                unp_disconnect(unp);
                    418:                break;
                    419:
                    420:        case PRU_ACCEPT:
1.112     ad        421:                KASSERT(so->so_lock == uipc_lock);
1.72      matt      422:                /*
                    423:                 * Mark the initiating STREAM socket as connected *ONLY*
                    424:                 * after it's been accepted.  This prevents a client from
                    425:                 * overrunning a server and receiving ECONNREFUSED.
                    426:                 */
1.145     christos  427:                if (unp->unp_conn == NULL) {
                    428:                        /*
                    429:                         * This will use the empty socket and will not
                    430:                         * allocate.
                    431:                         */
                    432:                        unp_setaddr(so, nam, true);
1.112     ad        433:                        break;
1.145     christos  434:                }
1.112     ad        435:                so2 = unp->unp_conn->unp_socket;
                    436:                if (so2->so_state & SS_ISCONNECTING) {
                    437:                        KASSERT(solocked2(so, so->so_head));
                    438:                        KASSERT(solocked2(so2, so->so_head));
                    439:                        soisconnected(so2);
                    440:                }
                    441:                /*
                    442:                 * If the connection is fully established, break the
                    443:                 * association with uipc_lock and give the connected
1.152     rmind     444:                 * pair a separate lock to share.
1.127     bouyer    445:                 * There is a race here: sotounpcb(so2)->unp_streamlock
                    446:                 * is not locked, so when changing so2->so_lock
                    447:                 * another thread can grab it while so->so_lock is still
                    448:                 * pointing to the (locked) uipc_lock.
1.129     wiz       449:                 * this should be harmless, except that this makes
1.127     bouyer    450:                 * solocked2() and solocked() unreliable.
                    451:                 * Another problem is that unp_setaddr() expects the
                    452:                 * the socket locked. Grabing sotounpcb(so2)->unp_streamlock
                    453:                 * fixes both issues.
1.112     ad        454:                 */
1.127     bouyer    455:                mutex_enter(sotounpcb(so2)->unp_streamlock);
1.112     ad        456:                unp_setpeerlocks(so2, so);
                    457:                /*
                    458:                 * Only now return peer's address, as we may need to
                    459:                 * block in order to allocate memory.
                    460:                 *
                    461:                 * XXX Minor race: connection can be broken while
                    462:                 * lock is dropped in unp_setaddr().  We will return
                    463:                 * error == 0 and sun_noname as the peer address.
                    464:                 */
                    465:                unp_setaddr(so, nam, true);
1.127     bouyer    466:                /* so_lock now points to unp_streamlock */
                    467:                mutex_exit(so2->so_lock);
1.1       cgd       468:                break;
                    469:
                    470:        case PRU_SHUTDOWN:
                    471:                socantsendmore(so);
                    472:                unp_shutdown(unp);
                    473:                break;
                    474:
                    475:        case PRU_RCVD:
                    476:                switch (so->so_type) {
                    477:
                    478:                case SOCK_DGRAM:
                    479:                        panic("uipc 1");
                    480:                        /*NOTREACHED*/
                    481:
1.134     manu      482:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       483:                case SOCK_STREAM:
                    484: #define        rcv (&so->so_rcv)
                    485: #define snd (&so2->so_snd)
                    486:                        if (unp->unp_conn == 0)
                    487:                                break;
                    488:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        489:                        KASSERT(solocked2(so, so2));
1.1       cgd       490:                        /*
                    491:                         * Adjust backpressure on sender
                    492:                         * and wakeup any waiting to write.
                    493:                         */
                    494:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    495:                        unp->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  496:                        newhiwat = snd->sb_hiwat + unp->unp_cc - rcv->sb_cc;
1.81      christos  497:                        (void)chgsbsize(so2->so_uidinfo,
1.75      christos  498:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       499:                        unp->unp_cc = rcv->sb_cc;
                    500:                        sowwakeup(so2);
                    501: #undef snd
                    502: #undef rcv
                    503:                        break;
                    504:
                    505:                default:
                    506:                        panic("uipc 2");
                    507:                }
                    508:                break;
                    509:
                    510:        case PRU_SEND:
1.30      thorpej   511:                /*
                    512:                 * Note: unp_internalize() rejects any control message
                    513:                 * other than SCM_RIGHTS, and only allows one.  This
                    514:                 * has the side-effect of preventing a caller from
                    515:                 * forging SCM_CREDS.
                    516:                 */
1.90      christos  517:                if (control) {
1.112     ad        518:                        sounlock(so);
                    519:                        error = unp_internalize(&control);
                    520:                        solock(so);
                    521:                        if (error != 0) {
1.111     mlelstv   522:                                m_freem(control);
                    523:                                m_freem(m);
                    524:                                break;
                    525:                        }
1.83      yamt      526:                }
1.1       cgd       527:                switch (so->so_type) {
                    528:
                    529:                case SOCK_DGRAM: {
1.112     ad        530:                        KASSERT(so->so_lock == uipc_lock);
1.1       cgd       531:                        if (nam) {
1.111     mlelstv   532:                                if ((so->so_state & SS_ISCONNECTED) != 0)
1.1       cgd       533:                                        error = EISCONN;
1.111     mlelstv   534:                                else {
1.112     ad        535:                                        /*
                    536:                                         * Note: once connected, the
                    537:                                         * socket's lock must not be
                    538:                                         * dropped until we have sent
                    539:                                         * the message and disconnected.
                    540:                                         * This is necessary to prevent
                    541:                                         * intervening control ops, like
                    542:                                         * another connection.
                    543:                                         */
1.111     mlelstv   544:                                        error = unp_connect(so, nam, l);
1.20      mycroft   545:                                }
1.1       cgd       546:                        } else {
1.111     mlelstv   547:                                if ((so->so_state & SS_ISCONNECTED) == 0)
1.1       cgd       548:                                        error = ENOTCONN;
1.111     mlelstv   549:                        }
                    550:                        if (error) {
                    551:                                unp_dispose(control);
                    552:                                m_freem(control);
                    553:                                m_freem(m);
                    554:                                break;
1.1       cgd       555:                        }
1.147     martin    556:                        KASSERT(l != NULL);
1.92      ad        557:                        error = unp_output(m, control, unp, l);
1.1       cgd       558:                        if (nam)
                    559:                                unp_disconnect(unp);
                    560:                        break;
                    561:                }
                    562:
1.134     manu      563:                case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd       564:                case SOCK_STREAM:
                    565: #define        rcv (&so2->so_rcv)
                    566: #define        snd (&so->so_snd)
1.87      christos  567:                        if (unp->unp_conn == NULL) {
                    568:                                error = ENOTCONN;
                    569:                                break;
                    570:                        }
1.1       cgd       571:                        so2 = unp->unp_conn->unp_socket;
1.112     ad        572:                        KASSERT(solocked2(so, so2));
1.30      thorpej   573:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    574:                                /*
                    575:                                 * Credentials are passed only once on
1.134     manu      576:                                 * SOCK_STREAM and SOCK_SEQPACKET.
1.30      thorpej   577:                                 */
                    578:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
1.92      ad        579:                                control = unp_addsockcred(l, control);
1.30      thorpej   580:                        }
1.1       cgd       581:                        /*
                    582:                         * Send to paired receive port, and then reduce
                    583:                         * send buffer hiwater marks to maintain backpressure.
                    584:                         * Wake up readers.
                    585:                         */
                    586:                        if (control) {
1.112     ad        587:                                if (sbappendcontrol(rcv, m, control) != 0)
                    588:                                        control = NULL;
1.134     manu      589:                        } else {
                    590:                                switch(so->so_type) {
                    591:                                case SOCK_SEQPACKET:
                    592:                                        sbappendrecord(rcv, m);
                    593:                                        break;
                    594:                                case SOCK_STREAM:
                    595:                                        sbappend(rcv, m);
                    596:                                        break;
                    597:                                default:
                    598:                                        panic("uipc_usrreq");
                    599:                                        break;
                    600:                                }
                    601:                        }
1.1       cgd       602:                        snd->sb_mbmax -=
                    603:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    604:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
1.75      christos  605:                        newhiwat = snd->sb_hiwat -
                    606:                            (rcv->sb_cc - unp->unp_conn->unp_cc);
1.81      christos  607:                        (void)chgsbsize(so->so_uidinfo,
1.75      christos  608:                            &snd->sb_hiwat, newhiwat, RLIM_INFINITY);
1.1       cgd       609:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    610:                        sorwakeup(so2);
                    611: #undef snd
                    612: #undef rcv
1.112     ad        613:                        if (control != NULL) {
                    614:                                unp_dispose(control);
                    615:                                m_freem(control);
                    616:                        }
1.1       cgd       617:                        break;
                    618:
                    619:                default:
                    620:                        panic("uipc 4");
                    621:                }
                    622:                break;
                    623:
                    624:        case PRU_ABORT:
1.112     ad        625:                (void)unp_drop(unp, ECONNABORTED);
1.88      matt      626:                KASSERT(so->so_head == NULL);
1.152     rmind     627:                KASSERT(so->so_pcb != NULL);
                    628:                unp_detach(so);
1.1       cgd       629:                break;
                    630:
                    631:        case PRU_RCVOOB:
1.20      mycroft   632:                error = EOPNOTSUPP;
                    633:                break;
1.1       cgd       634:
                    635:        case PRU_SENDOOB:
1.22      mycroft   636:                m_freem(control);
1.20      mycroft   637:                m_freem(m);
1.1       cgd       638:                error = EOPNOTSUPP;
                    639:                break;
                    640:
                    641:        case PRU_SOCKADDR:
1.112     ad        642:                unp_setaddr(so, nam, false);
1.1       cgd       643:                break;
                    644:
                    645:        case PRU_PEERADDR:
1.112     ad        646:                unp_setaddr(so, nam, true);
1.1       cgd       647:                break;
                    648:
                    649:        default:
                    650:                panic("piusrreq");
                    651:        }
1.20      mycroft   652:
1.1       cgd       653: release:
                    654:        return (error);
                    655: }
                    656:
                    657: /*
1.30      thorpej   658:  * Unix domain socket option processing.
                    659:  */
                    660: int
1.118     plunky    661: uipc_ctloutput(int op, struct socket *so, struct sockopt *sopt)
1.30      thorpej   662: {
                    663:        struct unpcb *unp = sotounpcb(so);
                    664:        int optval = 0, error = 0;
                    665:
1.112     ad        666:        KASSERT(solocked(so));
                    667:
1.118     plunky    668:        if (sopt->sopt_level != 0) {
1.100     dyoung    669:                error = ENOPROTOOPT;
1.30      thorpej   670:        } else switch (op) {
                    671:
                    672:        case PRCO_SETOPT:
1.118     plunky    673:                switch (sopt->sopt_name) {
1.30      thorpej   674:                case LOCAL_CREDS:
1.72      matt      675:                case LOCAL_CONNWAIT:
1.118     plunky    676:                        error = sockopt_getint(sopt, &optval);
                    677:                        if (error)
                    678:                                break;
                    679:                        switch (sopt->sopt_name) {
1.30      thorpej   680: #define        OPTSET(bit) \
                    681:        if (optval) \
                    682:                unp->unp_flags |= (bit); \
                    683:        else \
                    684:                unp->unp_flags &= ~(bit);
                    685:
1.118     plunky    686:                        case LOCAL_CREDS:
                    687:                                OPTSET(UNP_WANTCRED);
                    688:                                break;
                    689:                        case LOCAL_CONNWAIT:
                    690:                                OPTSET(UNP_CONNWAIT);
                    691:                                break;
1.30      thorpej   692:                        }
                    693:                        break;
                    694: #undef OPTSET
                    695:
                    696:                default:
                    697:                        error = ENOPROTOOPT;
                    698:                        break;
                    699:                }
                    700:                break;
                    701:
                    702:        case PRCO_GETOPT:
1.112     ad        703:                sounlock(so);
1.118     plunky    704:                switch (sopt->sopt_name) {
1.99      he        705:                case LOCAL_PEEREID:
                    706:                        if (unp->unp_flags & UNP_EIDSVALID) {
1.118     plunky    707:                                error = sockopt_set(sopt,
                    708:                                    &unp->unp_connid, sizeof(unp->unp_connid));
1.99      he        709:                        } else {
                    710:                                error = EINVAL;
                    711:                        }
                    712:                        break;
1.30      thorpej   713:                case LOCAL_CREDS:
                    714: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    715:
1.99      he        716:                        optval = OPTBIT(UNP_WANTCRED);
1.118     plunky    717:                        error = sockopt_setint(sopt, optval);
1.30      thorpej   718:                        break;
                    719: #undef OPTBIT
                    720:
                    721:                default:
                    722:                        error = ENOPROTOOPT;
                    723:                        break;
                    724:                }
1.112     ad        725:                solock(so);
1.30      thorpej   726:                break;
                    727:        }
                    728:        return (error);
                    729: }
                    730:
                    731: /*
1.1       cgd       732:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    733:  * for stream sockets, although the total for sender and receiver is
                    734:  * actually only PIPSIZ.
                    735:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    736:  * and don't really want to reserve the sendspace.  Their recvspace should
                    737:  * be large enough for at least one max-size datagram plus address.
                    738:  */
                    739: #define        PIPSIZ  4096
                    740: u_long unpst_sendspace = PIPSIZ;
                    741: u_long unpst_recvspace = PIPSIZ;
                    742: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    743: u_long unpdg_recvspace = 4*1024;
                    744:
1.121     mrg       745: u_int  unp_rights;                     /* files in flight */
                    746: u_int  unp_rights_ratio = 2;           /* limit, fraction of maxfiles */
1.1       cgd       747:
1.152     rmind     748: static int
                    749: unp_attach(struct socket *so, int proto)
1.1       cgd       750: {
1.152     rmind     751:        struct unpcb *unp = sotounpcb(so);
                    752:        u_long sndspc, rcvspc;
1.1       cgd       753:        int error;
1.80      perry     754:
1.152     rmind     755:        KASSERT(unp == NULL);
                    756:
1.112     ad        757:        switch (so->so_type) {
1.152     rmind     758:        case SOCK_SEQPACKET:
                    759:                /* FALLTHROUGH */
1.112     ad        760:        case SOCK_STREAM:
                    761:                if (so->so_lock == NULL) {
                    762:                        so->so_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
                    763:                        solock(so);
                    764:                }
1.152     rmind     765:                sndspc = unpst_sendspace;
                    766:                rcvspc = unpst_recvspace;
1.112     ad        767:                break;
1.1       cgd       768:
1.112     ad        769:        case SOCK_DGRAM:
                    770:                if (so->so_lock == NULL) {
                    771:                        mutex_obj_hold(uipc_lock);
                    772:                        so->so_lock = uipc_lock;
                    773:                        solock(so);
                    774:                }
1.152     rmind     775:                sndspc = unpdg_sendspace;
                    776:                rcvspc = unpdg_recvspace;
1.112     ad        777:                break;
1.8       mycroft   778:
1.112     ad        779:        default:
                    780:                panic("unp_attach");
1.1       cgd       781:        }
1.152     rmind     782:
                    783:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    784:                error = soreserve(so, sndspc, rcvspc);
                    785:                if (error) {
                    786:                        return error;
                    787:                }
                    788:        }
                    789:
                    790:        unp = kmem_zalloc(sizeof(*unp), KM_SLEEP);
                    791:        nanotime(&unp->unp_ctime);
1.14      mycroft   792:        unp->unp_socket = so;
1.15      mycroft   793:        so->so_pcb = unp;
1.152     rmind     794:
                    795:        KASSERT(solocked(so));
                    796:        return 0;
1.1       cgd       797: }
                    798:
1.152     rmind     799: static void
                    800: unp_detach(struct socket *so)
1.1       cgd       801: {
1.152     rmind     802:        struct unpcb *unp;
1.112     ad        803:        vnode_t *vp;
                    804:
1.152     rmind     805:        unp = sotounpcb(so);
                    806:        KASSERT(unp != NULL);
                    807:        KASSERT(solocked(so));
1.112     ad        808:  retry:
                    809:        if ((vp = unp->unp_vnode) != NULL) {
                    810:                sounlock(so);
                    811:                /* Acquire v_interlock to protect against unp_connect(). */
1.113     ad        812:                /* XXXAD racy */
1.135     rmind     813:                mutex_enter(vp->v_interlock);
1.112     ad        814:                vp->v_socket = NULL;
1.148     hannken   815:                mutex_exit(vp->v_interlock);
                    816:                vrele(vp);
1.112     ad        817:                solock(so);
                    818:                unp->unp_vnode = NULL;
1.1       cgd       819:        }
                    820:        if (unp->unp_conn)
                    821:                unp_disconnect(unp);
1.112     ad        822:        while (unp->unp_refs) {
                    823:                KASSERT(solocked2(so, unp->unp_refs->unp_socket));
                    824:                if (unp_drop(unp->unp_refs, ECONNRESET)) {
                    825:                        solock(so);
                    826:                        goto retry;
                    827:                }
                    828:        }
                    829:        soisdisconnected(so);
                    830:        so->so_pcb = NULL;
1.8       mycroft   831:        if (unp_rights) {
                    832:                /*
1.121     mrg       833:                 * Normally the receive buffer is flushed later, in sofree,
                    834:                 * but if our receive buffer holds references to files that
                    835:                 * are now garbage, we will enqueue those file references to
                    836:                 * the garbage collector and kick it into action.
1.8       mycroft   837:                 */
1.112     ad        838:                sorflush(so);
                    839:                unp_free(unp);
1.121     mrg       840:                unp_thread_kick();
1.14      mycroft   841:        } else
1.112     ad        842:                unp_free(unp);
1.1       cgd       843: }
                    844:
1.154     rtr       845: static int
1.155     rtr       846: unp_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
1.154     rtr       847: {
                    848:        return EOPNOTSUPP;
                    849: }
                    850:
1.156     rtr       851: static int
                    852: unp_stat(struct socket *so, struct stat *ub)
                    853: {
                    854:        struct unpcb *unp;
                    855:        struct socket *so2;
                    856:
1.157   ! rtr       857:        KASSERT(solocked(so));
        !           858:
1.156     rtr       859:        unp = sotounpcb(so);
                    860:        if (unp == NULL)
                    861:                return EINVAL;
                    862:
                    863:        ub->st_blksize = so->so_snd.sb_hiwat;
                    864:        switch (so->so_type) {
                    865:        case SOCK_SEQPACKET: /* FALLTHROUGH */
                    866:        case SOCK_STREAM:
                    867:                if (unp->unp_conn == 0)
                    868:                        break;
                    869:
                    870:                so2 = unp->unp_conn->unp_socket;
                    871:                KASSERT(solocked2(so, so2));
                    872:                ub->st_blksize += so2->so_rcv.sb_cc;
                    873:                break;
                    874:        default:
                    875:                break;
                    876:        }
                    877:        ub->st_dev = NODEV;
                    878:        if (unp->unp_ino == 0)
                    879:                unp->unp_ino = unp_ino++;
                    880:        ub->st_atimespec = ub->st_mtimespec = ub->st_ctimespec = unp->unp_ctime;
                    881:        ub->st_ino = unp->unp_ino;
                    882:        return (0);
                    883: }
                    884:
1.146     christos  885: /*
                    886:  * Allocate the new sockaddr.  We have to allocate one
                    887:  * extra byte so that we can ensure that the pathname
                    888:  * is nul-terminated. Note that unlike linux, we don't
                    889:  * include in the address length the NUL in the path
                    890:  * component, because doing so, would exceed sizeof(sockaddr_un)
                    891:  * for fully occupied pathnames. Linux is also inconsistent,
                    892:  * because it does not include the NUL in the length of
                    893:  * what it calls "abstract" unix sockets.
                    894:  */
                    895: static struct sockaddr_un *
                    896: makeun(struct mbuf *nam, size_t *addrlen) {
                    897:        struct sockaddr_un *sun;
                    898:
                    899:        *addrlen = nam->m_len + 1;
                    900:        sun = malloc(*addrlen, M_SONAME, M_WAITOK);
                    901:        m_copydata(nam, 0, nam->m_len, (void *)sun);
                    902:        *(((char *)sun) + nam->m_len) = '\0';
                    903:        sun->sun_len = strlen(sun->sun_path) +
                    904:            offsetof(struct sockaddr_un, sun_path);
                    905:        return sun;
                    906: }
                    907:
1.5       andrew    908: int
1.112     ad        909: unp_bind(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       910: {
1.27      thorpej   911:        struct sockaddr_un *sun;
1.112     ad        912:        struct unpcb *unp;
1.106     ad        913:        vnode_t *vp;
1.1       cgd       914:        struct vattr vattr;
1.27      thorpej   915:        size_t addrlen;
1.1       cgd       916:        int error;
1.133     dholland  917:        struct pathbuf *pb;
1.1       cgd       918:        struct nameidata nd;
1.112     ad        919:        proc_t *p;
1.1       cgd       920:
1.112     ad        921:        unp = sotounpcb(so);
                    922:        if (unp->unp_vnode != NULL)
1.20      mycroft   923:                return (EINVAL);
1.109     ad        924:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                    925:                /*
                    926:                 * EALREADY may not be strictly accurate, but since this
                    927:                 * is a major application error it's hardly a big deal.
                    928:                 */
                    929:                return (EALREADY);
                    930:        }
                    931:        unp->unp_flags |= UNP_BUSY;
1.112     ad        932:        sounlock(so);
1.109     ad        933:
1.112     ad        934:        p = l->l_proc;
1.146     christos  935:        sun = makeun(nam, &addrlen);
1.27      thorpej   936:
1.133     dholland  937:        pb = pathbuf_create(sun->sun_path);
                    938:        if (pb == NULL) {
                    939:                error = ENOMEM;
                    940:                goto bad;
                    941:        }
                    942:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT | TRYEMULROOT, pb);
1.27      thorpej   943:
1.1       cgd       944: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.133     dholland  945:        if ((error = namei(&nd)) != 0) {
                    946:                pathbuf_destroy(pb);
1.27      thorpej   947:                goto bad;
1.133     dholland  948:        }
1.9       mycroft   949:        vp = nd.ni_vp;
1.96      hannken   950:        if (vp != NULL) {
1.9       mycroft   951:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    952:                if (nd.ni_dvp == vp)
                    953:                        vrele(nd.ni_dvp);
1.1       cgd       954:                else
1.9       mycroft   955:                        vput(nd.ni_dvp);
1.1       cgd       956:                vrele(vp);
1.133     dholland  957:                pathbuf_destroy(pb);
1.96      hannken   958:                error = EADDRINUSE;
                    959:                goto bad;
1.1       cgd       960:        }
1.128     pooka     961:        vattr_null(&vattr);
1.1       cgd       962:        vattr.va_type = VSOCK;
1.84      jmmv      963:        vattr.va_mode = ACCESSPERMS & ~(p->p_cwdi->cwdi_cmask);
1.16      christos  964:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
1.133     dholland  965:        if (error) {
1.149     hannken   966:                vput(nd.ni_dvp);
1.133     dholland  967:                pathbuf_destroy(pb);
1.27      thorpej   968:                goto bad;
1.133     dholland  969:        }
1.9       mycroft   970:        vp = nd.ni_vp;
1.150     hannken   971:        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.112     ad        972:        solock(so);
1.1       cgd       973:        vp->v_socket = unp->unp_socket;
                    974:        unp->unp_vnode = vp;
1.27      thorpej   975:        unp->unp_addrlen = addrlen;
                    976:        unp->unp_addr = sun;
1.99      he        977:        unp->unp_connid.unp_pid = p->p_pid;
1.112     ad        978:        unp->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                    979:        unp->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he        980:        unp->unp_flags |= UNP_EIDSBIND;
1.130     hannken   981:        VOP_UNLOCK(vp);
1.149     hannken   982:        vput(nd.ni_dvp);
1.109     ad        983:        unp->unp_flags &= ~UNP_BUSY;
1.133     dholland  984:        pathbuf_destroy(pb);
1.1       cgd       985:        return (0);
1.27      thorpej   986:
                    987:  bad:
                    988:        free(sun, M_SONAME);
1.112     ad        989:        solock(so);
1.109     ad        990:        unp->unp_flags &= ~UNP_BUSY;
1.27      thorpej   991:        return (error);
1.1       cgd       992: }
                    993:
1.5       andrew    994: int
1.86      christos  995: unp_connect(struct socket *so, struct mbuf *nam, struct lwp *l)
1.1       cgd       996: {
1.46      augustss  997:        struct sockaddr_un *sun;
1.106     ad        998:        vnode_t *vp;
1.46      augustss  999:        struct socket *so2, *so3;
1.99      he       1000:        struct unpcb *unp, *unp2, *unp3;
1.27      thorpej  1001:        size_t addrlen;
1.1       cgd      1002:        int error;
1.133     dholland 1003:        struct pathbuf *pb;
1.1       cgd      1004:        struct nameidata nd;
                   1005:
1.109     ad       1006:        unp = sotounpcb(so);
                   1007:        if ((unp->unp_flags & UNP_BUSY) != 0) {
                   1008:                /*
                   1009:                 * EALREADY may not be strictly accurate, but since this
                   1010:                 * is a major application error it's hardly a big deal.
                   1011:                 */
                   1012:                return (EALREADY);
                   1013:        }
                   1014:        unp->unp_flags |= UNP_BUSY;
1.112     ad       1015:        sounlock(so);
1.109     ad       1016:
1.146     christos 1017:        sun = makeun(nam, &addrlen);
1.133     dholland 1018:        pb = pathbuf_create(sun->sun_path);
                   1019:        if (pb == NULL) {
                   1020:                error = ENOMEM;
                   1021:                goto bad2;
                   1022:        }
1.27      thorpej  1023:
1.133     dholland 1024:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
                   1025:
                   1026:        if ((error = namei(&nd)) != 0) {
                   1027:                pathbuf_destroy(pb);
1.27      thorpej  1028:                goto bad2;
1.133     dholland 1029:        }
1.9       mycroft  1030:        vp = nd.ni_vp;
1.1       cgd      1031:        if (vp->v_type != VSOCK) {
                   1032:                error = ENOTSOCK;
                   1033:                goto bad;
                   1034:        }
1.133     dholland 1035:        pathbuf_destroy(pb);
1.102     pooka    1036:        if ((error = VOP_ACCESS(vp, VWRITE, l->l_cred)) != 0)
1.1       cgd      1037:                goto bad;
1.112     ad       1038:        /* Acquire v_interlock to protect against unp_detach(). */
1.135     rmind    1039:        mutex_enter(vp->v_interlock);
1.1       cgd      1040:        so2 = vp->v_socket;
1.112     ad       1041:        if (so2 == NULL) {
1.135     rmind    1042:                mutex_exit(vp->v_interlock);
1.1       cgd      1043:                error = ECONNREFUSED;
                   1044:                goto bad;
                   1045:        }
                   1046:        if (so->so_type != so2->so_type) {
1.135     rmind    1047:                mutex_exit(vp->v_interlock);
1.1       cgd      1048:                error = EPROTOTYPE;
                   1049:                goto bad;
                   1050:        }
1.112     ad       1051:        solock(so);
                   1052:        unp_resetlock(so);
1.135     rmind    1053:        mutex_exit(vp->v_interlock);
1.112     ad       1054:        if ((so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
                   1055:                /*
                   1056:                 * This may seem somewhat fragile but is OK: if we can
                   1057:                 * see SO_ACCEPTCONN set on the endpoint, then it must
                   1058:                 * be locked by the domain-wide uipc_lock.
                   1059:                 */
1.132     yamt     1060:                KASSERT((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.112     ad       1061:                    so2->so_lock == uipc_lock);
1.1       cgd      1062:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
1.144     rmind    1063:                    (so3 = sonewconn(so2, false)) == NULL) {
1.1       cgd      1064:                        error = ECONNREFUSED;
1.112     ad       1065:                        sounlock(so);
1.1       cgd      1066:                        goto bad;
                   1067:                }
                   1068:                unp2 = sotounpcb(so2);
                   1069:                unp3 = sotounpcb(so3);
1.26      thorpej  1070:                if (unp2->unp_addr) {
                   1071:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                   1072:                            M_SONAME, M_WAITOK);
1.36      perry    1073:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej  1074:                            unp2->unp_addrlen);
                   1075:                        unp3->unp_addrlen = unp2->unp_addrlen;
                   1076:                }
1.30      thorpej  1077:                unp3->unp_flags = unp2->unp_flags;
1.112     ad       1078:                unp3->unp_connid.unp_pid = l->l_proc->p_pid;
                   1079:                unp3->unp_connid.unp_euid = kauth_cred_geteuid(l->l_cred);
                   1080:                unp3->unp_connid.unp_egid = kauth_cred_getegid(l->l_cred);
1.99      he       1081:                unp3->unp_flags |= UNP_EIDSVALID;
                   1082:                if (unp2->unp_flags & UNP_EIDSBIND) {
                   1083:                        unp->unp_connid = unp2->unp_connid;
                   1084:                        unp->unp_flags |= UNP_EIDSVALID;
                   1085:                }
1.112     ad       1086:                so2 = so3;
1.33      thorpej  1087:        }
1.72      matt     1088:        error = unp_connect2(so, so2, PRU_CONNECT);
1.112     ad       1089:        sounlock(so);
1.27      thorpej  1090:  bad:
1.1       cgd      1091:        vput(vp);
1.27      thorpej  1092:  bad2:
                   1093:        free(sun, M_SONAME);
1.112     ad       1094:        solock(so);
1.109     ad       1095:        unp->unp_flags &= ~UNP_BUSY;
1.1       cgd      1096:        return (error);
                   1097: }
                   1098:
1.5       andrew   1099: int
1.76      matt     1100: unp_connect2(struct socket *so, struct socket *so2, int req)
1.1       cgd      1101: {
1.46      augustss 1102:        struct unpcb *unp = sotounpcb(so);
                   1103:        struct unpcb *unp2;
1.1       cgd      1104:
                   1105:        if (so2->so_type != so->so_type)
                   1106:                return (EPROTOTYPE);
1.112     ad       1107:
                   1108:        /*
                   1109:         * All three sockets involved must be locked by same lock:
                   1110:         *
                   1111:         * local endpoint (so)
                   1112:         * remote endpoint (so2)
1.131     yamt     1113:         * queue head (so2->so_head, only if PR_CONNREQUIRED)
1.112     ad       1114:         */
                   1115:        KASSERT(solocked2(so, so2));
1.125     yamt     1116:        KASSERT(so->so_head == NULL);
                   1117:        if (so2->so_head != NULL) {
                   1118:                KASSERT(so2->so_lock == uipc_lock);
                   1119:                KASSERT(solocked2(so2, so2->so_head));
1.112     ad       1120:        }
                   1121:
1.1       cgd      1122:        unp2 = sotounpcb(so2);
                   1123:        unp->unp_conn = unp2;
                   1124:        switch (so->so_type) {
                   1125:
                   1126:        case SOCK_DGRAM:
                   1127:                unp->unp_nextref = unp2->unp_refs;
                   1128:                unp2->unp_refs = unp;
                   1129:                soisconnected(so);
                   1130:                break;
                   1131:
1.134     manu     1132:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1133:        case SOCK_STREAM:
                   1134:                unp2->unp_conn = unp;
1.72      matt     1135:                if (req == PRU_CONNECT &&
                   1136:                    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
                   1137:                        soisconnecting(so);
                   1138:                else
                   1139:                        soisconnected(so);
1.1       cgd      1140:                soisconnected(so2);
1.112     ad       1141:                /*
                   1142:                 * If the connection is fully established, break the
                   1143:                 * association with uipc_lock and give the connected
                   1144:                 * pair a seperate lock to share.  For CONNECT2, we
                   1145:                 * require that the locks already match (the sockets
                   1146:                 * are created that way).
                   1147:                 */
1.125     yamt     1148:                if (req == PRU_CONNECT) {
                   1149:                        KASSERT(so2->so_head != NULL);
1.112     ad       1150:                        unp_setpeerlocks(so, so2);
1.125     yamt     1151:                }
1.1       cgd      1152:                break;
                   1153:
                   1154:        default:
                   1155:                panic("unp_connect2");
                   1156:        }
                   1157:        return (0);
                   1158: }
                   1159:
1.5       andrew   1160: void
1.76      matt     1161: unp_disconnect(struct unpcb *unp)
1.1       cgd      1162: {
1.46      augustss 1163:        struct unpcb *unp2 = unp->unp_conn;
1.112     ad       1164:        struct socket *so;
1.1       cgd      1165:
                   1166:        if (unp2 == 0)
                   1167:                return;
                   1168:        unp->unp_conn = 0;
1.112     ad       1169:        so = unp->unp_socket;
                   1170:        switch (so->so_type) {
1.1       cgd      1171:        case SOCK_DGRAM:
                   1172:                if (unp2->unp_refs == unp)
                   1173:                        unp2->unp_refs = unp->unp_nextref;
                   1174:                else {
                   1175:                        unp2 = unp2->unp_refs;
                   1176:                        for (;;) {
1.112     ad       1177:                                KASSERT(solocked2(so, unp2->unp_socket));
1.1       cgd      1178:                                if (unp2 == 0)
                   1179:                                        panic("unp_disconnect");
                   1180:                                if (unp2->unp_nextref == unp)
                   1181:                                        break;
                   1182:                                unp2 = unp2->unp_nextref;
                   1183:                        }
                   1184:                        unp2->unp_nextref = unp->unp_nextref;
                   1185:                }
                   1186:                unp->unp_nextref = 0;
1.112     ad       1187:                so->so_state &= ~SS_ISCONNECTED;
1.1       cgd      1188:                break;
                   1189:
1.134     manu     1190:        case SOCK_SEQPACKET: /* FALLTHROUGH */
1.1       cgd      1191:        case SOCK_STREAM:
1.112     ad       1192:                KASSERT(solocked2(so, unp2->unp_socket));
                   1193:                soisdisconnected(so);
1.1       cgd      1194:                unp2->unp_conn = 0;
                   1195:                soisdisconnected(unp2->unp_socket);
                   1196:                break;
                   1197:        }
                   1198: }
                   1199:
1.5       andrew   1200: void
1.76      matt     1201: unp_shutdown(struct unpcb *unp)
1.1       cgd      1202: {
                   1203:        struct socket *so;
                   1204:
1.134     manu     1205:        switch(unp->unp_socket->so_type) {
                   1206:        case SOCK_SEQPACKET: /* FALLTHROUGH */
                   1207:        case SOCK_STREAM:
                   1208:                if (unp->unp_conn && (so = unp->unp_conn->unp_socket))
                   1209:                        socantrcvmore(so);
                   1210:                break;
                   1211:        default:
                   1212:                break;
                   1213:        }
1.1       cgd      1214: }
                   1215:
1.112     ad       1216: bool
1.76      matt     1217: unp_drop(struct unpcb *unp, int errno)
1.1       cgd      1218: {
                   1219:        struct socket *so = unp->unp_socket;
                   1220:
1.112     ad       1221:        KASSERT(solocked(so));
                   1222:
1.1       cgd      1223:        so->so_error = errno;
                   1224:        unp_disconnect(unp);
                   1225:        if (so->so_head) {
1.112     ad       1226:                so->so_pcb = NULL;
                   1227:                /* sofree() drops the socket lock */
1.14      mycroft  1228:                sofree(so);
1.112     ad       1229:                unp_free(unp);
                   1230:                return true;
1.1       cgd      1231:        }
1.112     ad       1232:        return false;
1.1       cgd      1233: }
                   1234:
                   1235: #ifdef notdef
1.76      matt     1236: unp_drain(void)
1.1       cgd      1237: {
                   1238:
                   1239: }
                   1240: #endif
                   1241:
1.5       andrew   1242: int
1.136     christos 1243: unp_externalize(struct mbuf *rights, struct lwp *l, int flags)
1.1       cgd      1244: {
1.138     christos 1245:        struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
                   1246:        struct proc * const p = l->l_proc;
1.106     ad       1247:        file_t **rp;
1.138     christos 1248:        int error = 0;
1.47      thorpej  1249:
1.138     christos 1250:        const size_t nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1.106     ad       1251:            sizeof(file_t *);
1.143     drochner 1252:        if (nfds == 0)
                   1253:                goto noop;
1.1       cgd      1254:
1.138     christos 1255:        int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
1.101     ad       1256:        rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
1.50      thorpej  1257:
1.121     mrg      1258:        /* Make sure the recipient should be able to see the files.. */
1.140     christos 1259:        rp = (file_t **)CMSG_DATA(cm);
                   1260:        for (size_t i = 0; i < nfds; i++) {
                   1261:                file_t * const fp = *rp++;
                   1262:                if (fp == NULL) {
                   1263:                        error = EINVAL;
                   1264:                        goto out;
                   1265:                }
                   1266:                /*
                   1267:                 * If we are in a chroot'ed directory, and
                   1268:                 * someone wants to pass us a directory, make
                   1269:                 * sure it's inside the subtree we're allowed
                   1270:                 * to access.
                   1271:                 */
                   1272:                if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
                   1273:                        vnode_t *vp = (vnode_t *)fp->f_data;
                   1274:                        if ((vp->v_type == VDIR) &&
                   1275:                            !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
                   1276:                                error = EPERM;
                   1277:                                goto out;
1.39      sommerfe 1278:                        }
                   1279:                }
                   1280:        }
1.50      thorpej  1281:
                   1282:  restart:
1.24      cgd      1283:        /*
1.50      thorpej  1284:         * First loop -- allocate file descriptor table slots for the
1.121     mrg      1285:         * new files.
1.24      cgd      1286:         */
1.138     christos 1287:        for (size_t i = 0; i < nfds; i++) {
1.106     ad       1288:                if ((error = fd_alloc(p, 0, &fdp[i])) != 0) {
1.49      thorpej  1289:                        /*
1.50      thorpej  1290:                         * Back out what we've done so far.
1.49      thorpej  1291:                         */
1.138     christos 1292:                        while (i-- > 0) {
1.106     ad       1293:                                fd_abort(p, NULL, fdp[i]);
                   1294:                        }
1.50      thorpej  1295:                        if (error == ENOSPC) {
1.106     ad       1296:                                fd_tryexpand(p);
1.50      thorpej  1297:                                error = 0;
1.138     christos 1298:                                goto restart;
1.50      thorpej  1299:                        }
1.138     christos 1300:                        /*
                   1301:                         * This is the error that has historically
                   1302:                         * been returned, and some callers may
                   1303:                         * expect it.
                   1304:                         */
                   1305:                        error = EMSGSIZE;
                   1306:                        goto out;
1.49      thorpej  1307:                }
1.1       cgd      1308:        }
1.24      cgd      1309:
                   1310:        /*
1.50      thorpej  1311:         * Now that adding them has succeeded, update all of the
1.121     mrg      1312:         * file passing state and affix the descriptors.
1.112     ad       1313:         */
1.106     ad       1314:        rp = (file_t **)CMSG_DATA(cm);
1.138     christos 1315:        int *ofdp = (int *)CMSG_DATA(cm);
                   1316:        for (size_t i = 0; i < nfds; i++) {
                   1317:                file_t * const fp = *rp++;
                   1318:                const int fd = fdp[i];
1.106     ad       1319:                atomic_dec_uint(&unp_rights);
1.136     christos 1320:                fd_set_exclose(l, fd, (flags & O_CLOEXEC) != 0);
                   1321:                fd_affix(p, fp, fd);
1.138     christos 1322:                /*
                   1323:                 * Done with this file pointer, replace it with a fd;
                   1324:                 */
                   1325:                *ofdp++ = fd;
1.106     ad       1326:                mutex_enter(&fp->f_lock);
1.50      thorpej  1327:                fp->f_msgcount--;
1.106     ad       1328:                mutex_exit(&fp->f_lock);
                   1329:                /*
                   1330:                 * Note that fd_affix() adds a reference to the file.
                   1331:                 * The file may already have been closed by another
                   1332:                 * LWP in the process, so we must drop the reference
                   1333:                 * added by unp_internalize() with closef().
                   1334:                 */
                   1335:                closef(fp);
1.50      thorpej  1336:        }
                   1337:
                   1338:        /*
1.138     christos 1339:         * Adjust length, in case of transition from large file_t
                   1340:         * pointers to ints.
1.50      thorpej  1341:         */
1.138     christos 1342:        if (sizeof(file_t *) != sizeof(int)) {
                   1343:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                   1344:                rights->m_len = CMSG_SPACE(nfds * sizeof(int));
                   1345:        }
1.50      thorpej  1346:  out:
1.138     christos 1347:        if (__predict_false(error != 0)) {
1.141     riastrad 1348:                file_t **const fpp = (file_t **)CMSG_DATA(cm);
                   1349:                for (size_t i = 0; i < nfds; i++)
                   1350:                        unp_discard_now(fpp[i]);
                   1351:                /*
                   1352:                 * Truncate the array so that nobody will try to interpret
                   1353:                 * what is now garbage in it.
                   1354:                 */
                   1355:                cm->cmsg_len = CMSG_LEN(0);
                   1356:                rights->m_len = CMSG_SPACE(0);
1.138     christos 1357:        }
1.143     drochner 1358:        rw_exit(&p->p_cwdi->cwdi_lock);
                   1359:        kmem_free(fdp, nfds * sizeof(int));
1.138     christos 1360:
1.143     drochner 1361:  noop:
1.141     riastrad 1362:        /*
                   1363:         * Don't disclose kernel memory in the alignment space.
                   1364:         */
                   1365:        KASSERT(cm->cmsg_len <= rights->m_len);
                   1366:        memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len -
                   1367:            cm->cmsg_len);
1.139     christos 1368:        return error;
1.1       cgd      1369: }
                   1370:
1.5       andrew   1371: int
1.112     ad       1372: unp_internalize(struct mbuf **controlp)
1.1       cgd      1373: {
1.121     mrg      1374:        filedesc_t *fdescp = curlwp->l_fd;
1.108     yamt     1375:        struct mbuf *control = *controlp;
1.73      martin   1376:        struct cmsghdr *newcm, *cm = mtod(control, struct cmsghdr *);
1.106     ad       1377:        file_t **rp, **files;
                   1378:        file_t *fp;
1.46      augustss 1379:        int i, fd, *fdp;
1.106     ad       1380:        int nfds, error;
1.121     mrg      1381:        u_int maxmsg;
1.106     ad       1382:
                   1383:        error = 0;
                   1384:        newcm = NULL;
1.38      thorpej  1385:
1.106     ad       1386:        /* Sanity check the control message header. */
1.66      jdolecek 1387:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1.117     christos 1388:            cm->cmsg_len > control->m_len ||
                   1389:            cm->cmsg_len < CMSG_ALIGN(sizeof(*cm)))
1.1       cgd      1390:                return (EINVAL);
1.24      cgd      1391:
1.106     ad       1392:        /*
                   1393:         * Verify that the file descriptors are valid, and acquire
                   1394:         * a reference to each.
                   1395:         */
1.47      thorpej  1396:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                   1397:        fdp = (int *)CMSG_DATA(cm);
1.121     mrg      1398:        maxmsg = maxfiles / unp_rights_ratio;
1.24      cgd      1399:        for (i = 0; i < nfds; i++) {
                   1400:                fd = *fdp++;
1.121     mrg      1401:                if (atomic_inc_uint_nv(&unp_rights) > maxmsg) {
                   1402:                        atomic_dec_uint(&unp_rights);
                   1403:                        nfds = i;
                   1404:                        error = EAGAIN;
                   1405:                        goto out;
                   1406:                }
1.137     martin   1407:                if ((fp = fd_getfile(fd)) == NULL
                   1408:                    || fp->f_type == DTYPE_KQUEUE) {
                   1409:                        if (fp)
                   1410:                                fd_putfile(fd);
1.121     mrg      1411:                        atomic_dec_uint(&unp_rights);
1.120     pooka    1412:                        nfds = i;
1.106     ad       1413:                        error = EBADF;
                   1414:                        goto out;
1.101     ad       1415:                }
1.24      cgd      1416:        }
                   1417:
1.106     ad       1418:        /* Allocate new space and copy header into it. */
                   1419:        newcm = malloc(CMSG_SPACE(nfds * sizeof(file_t *)), M_MBUF, M_WAITOK);
                   1420:        if (newcm == NULL) {
                   1421:                error = E2BIG;
                   1422:                goto out;
                   1423:        }
                   1424:        memcpy(newcm, cm, sizeof(struct cmsghdr));
                   1425:        files = (file_t **)CMSG_DATA(newcm);
                   1426:
1.24      cgd      1427:        /*
1.106     ad       1428:         * Transform the file descriptors into file_t pointers, in
1.24      cgd      1429:         * reverse order so that if pointers are bigger than ints, the
1.106     ad       1430:         * int won't get until we're done.  No need to lock, as we have
                   1431:         * already validated the descriptors with fd_getfile().
1.24      cgd      1432:         */
1.94      cbiere   1433:        fdp = (int *)CMSG_DATA(cm) + nfds;
                   1434:        rp = files + nfds;
1.24      cgd      1435:        for (i = 0; i < nfds; i++) {
1.126     ad       1436:                fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file;
1.106     ad       1437:                KASSERT(fp != NULL);
                   1438:                mutex_enter(&fp->f_lock);
1.94      cbiere   1439:                *--rp = fp;
1.1       cgd      1440:                fp->f_count++;
                   1441:                fp->f_msgcount++;
1.106     ad       1442:                mutex_exit(&fp->f_lock);
                   1443:        }
                   1444:
                   1445:  out:
                   1446:        /* Release descriptor references. */
                   1447:        fdp = (int *)CMSG_DATA(cm);
                   1448:        for (i = 0; i < nfds; i++) {
                   1449:                fd_putfile(*fdp++);
1.121     mrg      1450:                if (error != 0) {
                   1451:                        atomic_dec_uint(&unp_rights);
                   1452:                }
1.1       cgd      1453:        }
1.73      martin   1454:
1.106     ad       1455:        if (error == 0) {
1.108     yamt     1456:                if (control->m_flags & M_EXT) {
                   1457:                        m_freem(control);
                   1458:                        *controlp = control = m_get(M_WAIT, MT_CONTROL);
                   1459:                }
1.106     ad       1460:                MEXTADD(control, newcm, CMSG_SPACE(nfds * sizeof(file_t *)),
1.73      martin   1461:                    M_MBUF, NULL, NULL);
                   1462:                cm = newcm;
1.106     ad       1463:                /*
                   1464:                 * Adjust message & mbuf to note amount of space
                   1465:                 * actually used.
                   1466:                 */
                   1467:                cm->cmsg_len = CMSG_LEN(nfds * sizeof(file_t *));
                   1468:                control->m_len = CMSG_SPACE(nfds * sizeof(file_t *));
1.73      martin   1469:        }
                   1470:
1.106     ad       1471:        return error;
1.30      thorpej  1472: }
                   1473:
                   1474: struct mbuf *
1.92      ad       1475: unp_addsockcred(struct lwp *l, struct mbuf *control)
1.30      thorpej  1476: {
                   1477:        struct sockcred *sc;
1.142     christos 1478:        struct mbuf *m;
                   1479:        void *p;
1.30      thorpej  1480:
1.142     christos 1481:        m = sbcreatecontrol1(&p, SOCKCREDSIZE(kauth_cred_ngroups(l->l_cred)),
                   1482:                SCM_CREDS, SOL_SOCKET, M_WAITOK);
                   1483:        if (m == NULL)
                   1484:                return control;
                   1485:
                   1486:        sc = p;
1.92      ad       1487:        sc->sc_uid = kauth_cred_getuid(l->l_cred);
                   1488:        sc->sc_euid = kauth_cred_geteuid(l->l_cred);
                   1489:        sc->sc_gid = kauth_cred_getgid(l->l_cred);
                   1490:        sc->sc_egid = kauth_cred_getegid(l->l_cred);
                   1491:        sc->sc_ngroups = kauth_cred_ngroups(l->l_cred);
1.142     christos 1492:
                   1493:        for (int i = 0; i < sc->sc_ngroups; i++)
1.92      ad       1494:                sc->sc_groups[i] = kauth_cred_group(l->l_cred, i);
1.30      thorpej  1495:
1.142     christos 1496:        return m_add(control, m);
1.1       cgd      1497: }
                   1498:
1.39      sommerfe 1499: /*
1.121     mrg      1500:  * Do a mark-sweep GC of files in the system, to free up any which are
                   1501:  * caught in flight to an about-to-be-closed socket.  Additionally,
                   1502:  * process deferred file closures.
1.39      sommerfe 1503:  */
1.121     mrg      1504: static void
                   1505: unp_gc(file_t *dp)
1.1       cgd      1506: {
1.121     mrg      1507:        extern  struct domain unixdomain;
                   1508:        file_t *fp, *np;
1.46      augustss 1509:        struct socket *so, *so1;
1.121     mrg      1510:        u_int i, old, new;
                   1511:        bool didwork;
1.1       cgd      1512:
1.121     mrg      1513:        KASSERT(curlwp == unp_thread_lwp);
                   1514:        KASSERT(mutex_owned(&filelist_lock));
1.106     ad       1515:
1.121     mrg      1516:        /*
                   1517:         * First, process deferred file closures.
                   1518:         */
                   1519:        while (!SLIST_EMPTY(&unp_thread_discard)) {
                   1520:                fp = SLIST_FIRST(&unp_thread_discard);
                   1521:                KASSERT(fp->f_unpcount > 0);
                   1522:                KASSERT(fp->f_count > 0);
                   1523:                KASSERT(fp->f_msgcount > 0);
                   1524:                KASSERT(fp->f_count >= fp->f_unpcount);
                   1525:                KASSERT(fp->f_count >= fp->f_msgcount);
                   1526:                KASSERT(fp->f_msgcount >= fp->f_unpcount);
                   1527:                SLIST_REMOVE_HEAD(&unp_thread_discard, f_unplist);
                   1528:                i = fp->f_unpcount;
                   1529:                fp->f_unpcount = 0;
                   1530:                mutex_exit(&filelist_lock);
                   1531:                for (; i != 0; i--) {
                   1532:                        unp_discard_now(fp);
                   1533:                }
                   1534:                mutex_enter(&filelist_lock);
                   1535:        }
1.39      sommerfe 1536:
1.121     mrg      1537:        /*
                   1538:         * Clear mark bits.  Ensure that we don't consider new files
                   1539:         * entering the file table during this loop (they will not have
                   1540:         * FSCAN set).
                   1541:         */
1.106     ad       1542:        unp_defer = 0;
                   1543:        LIST_FOREACH(fp, &filehead, f_list) {
1.121     mrg      1544:                for (old = fp->f_flag;; old = new) {
                   1545:                        new = atomic_cas_uint(&fp->f_flag, old,
                   1546:                            (old | FSCAN) & ~(FMARK|FDEFER));
                   1547:                        if (__predict_true(old == new)) {
                   1548:                                break;
                   1549:                        }
                   1550:                }
1.106     ad       1551:        }
1.39      sommerfe 1552:
                   1553:        /*
1.121     mrg      1554:         * Iterate over the set of sockets, marking ones believed (based on
                   1555:         * refcount) to be referenced from a process, and marking for rescan
                   1556:         * sockets which are queued on a socket.  Recan continues descending
                   1557:         * and searching for sockets referenced by sockets (FDEFER), until
                   1558:         * there are no more socket->socket references to be discovered.
1.39      sommerfe 1559:         */
1.1       cgd      1560:        do {
1.121     mrg      1561:                didwork = false;
                   1562:                for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1563:                        KASSERT(mutex_owned(&filelist_lock));
                   1564:                        np = LIST_NEXT(fp, f_list);
1.106     ad       1565:                        mutex_enter(&fp->f_lock);
1.121     mrg      1566:                        if ((fp->f_flag & FDEFER) != 0) {
1.106     ad       1567:                                atomic_and_uint(&fp->f_flag, ~FDEFER);
1.1       cgd      1568:                                unp_defer--;
1.106     ad       1569:                                KASSERT(fp->f_count != 0);
1.1       cgd      1570:                        } else {
1.101     ad       1571:                                if (fp->f_count == 0 ||
1.121     mrg      1572:                                    (fp->f_flag & FMARK) != 0 ||
                   1573:                                    fp->f_count == fp->f_msgcount ||
                   1574:                                    fp->f_unpcount != 0) {
1.106     ad       1575:                                        mutex_exit(&fp->f_lock);
1.1       cgd      1576:                                        continue;
1.101     ad       1577:                                }
1.1       cgd      1578:                        }
1.106     ad       1579:                        atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1580:
1.1       cgd      1581:                        if (fp->f_type != DTYPE_SOCKET ||
1.112     ad       1582:                            (so = fp->f_data) == NULL ||
1.101     ad       1583:                            so->so_proto->pr_domain != &unixdomain ||
1.121     mrg      1584:                            (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
1.106     ad       1585:                                mutex_exit(&fp->f_lock);
1.1       cgd      1586:                                continue;
1.101     ad       1587:                        }
1.121     mrg      1588:
                   1589:                        /* Gain file ref, mark our position, and unlock. */
                   1590:                        didwork = true;
                   1591:                        LIST_INSERT_AFTER(fp, dp, f_list);
                   1592:                        fp->f_count++;
1.106     ad       1593:                        mutex_exit(&fp->f_lock);
1.121     mrg      1594:                        mutex_exit(&filelist_lock);
1.101     ad       1595:
1.112     ad       1596:                        /*
1.121     mrg      1597:                         * Mark files referenced from sockets queued on the
                   1598:                         * accept queue as well.
1.112     ad       1599:                         */
                   1600:                        solock(so);
1.39      sommerfe 1601:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
1.121     mrg      1602:                        if ((so->so_options & SO_ACCEPTCONN) != 0) {
1.54      matt     1603:                                TAILQ_FOREACH(so1, &so->so_q0, so_qe) {
1.39      sommerfe 1604:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1605:                                }
1.54      matt     1606:                                TAILQ_FOREACH(so1, &so->so_q, so_qe) {
1.39      sommerfe 1607:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1608:                                }
                   1609:                        }
1.112     ad       1610:                        sounlock(so);
1.121     mrg      1611:
                   1612:                        /* Re-lock and restart from where we left off. */
                   1613:                        closef(fp);
                   1614:                        mutex_enter(&filelist_lock);
                   1615:                        np = LIST_NEXT(dp, f_list);
                   1616:                        LIST_REMOVE(dp, f_list);
1.1       cgd      1617:                }
1.121     mrg      1618:                /*
                   1619:                 * Bail early if we did nothing in the loop above.  Could
                   1620:                 * happen because of concurrent activity causing unp_defer
                   1621:                 * to get out of sync.
                   1622:                 */
                   1623:        } while (unp_defer != 0 && didwork);
1.101     ad       1624:
1.8       mycroft  1625:        /*
1.121     mrg      1626:         * Sweep pass.
1.8       mycroft  1627:         *
1.121     mrg      1628:         * We grab an extra reference to each of the files that are
                   1629:         * not otherwise accessible and then free the rights that are
                   1630:         * stored in messages on them.
1.8       mycroft  1631:         */
1.121     mrg      1632:        for (fp = LIST_FIRST(&filehead); fp != NULL; fp = np) {
                   1633:                KASSERT(mutex_owned(&filelist_lock));
                   1634:                np = LIST_NEXT(fp, f_list);
1.106     ad       1635:                mutex_enter(&fp->f_lock);
1.121     mrg      1636:
                   1637:                /*
                   1638:                 * Ignore non-sockets.
                   1639:                 * Ignore dead sockets, or sockets with pending close.
                   1640:                 * Ignore sockets obviously referenced elsewhere.
                   1641:                 * Ignore sockets marked as referenced by our scan.
                   1642:                 * Ignore new sockets that did not exist during the scan.
                   1643:                 */
                   1644:                if (fp->f_type != DTYPE_SOCKET ||
                   1645:                    fp->f_count == 0 || fp->f_unpcount != 0 ||
                   1646:                    fp->f_count != fp->f_msgcount ||
                   1647:                    (fp->f_flag & (FMARK | FSCAN)) != FSCAN) {
                   1648:                        mutex_exit(&fp->f_lock);
                   1649:                        continue;
1.8       mycroft  1650:                }
1.121     mrg      1651:
                   1652:                /* Gain file ref, mark our position, and unlock. */
                   1653:                LIST_INSERT_AFTER(fp, dp, f_list);
                   1654:                fp->f_count++;
1.106     ad       1655:                mutex_exit(&fp->f_lock);
1.121     mrg      1656:                mutex_exit(&filelist_lock);
                   1657:
                   1658:                /*
                   1659:                 * Flush all data from the socket's receive buffer.
                   1660:                 * This will cause files referenced only by the
                   1661:                 * socket to be queued for close.
                   1662:                 */
                   1663:                so = fp->f_data;
                   1664:                solock(so);
                   1665:                sorflush(so);
                   1666:                sounlock(so);
                   1667:
                   1668:                /* Re-lock and restart from where we left off. */
                   1669:                closef(fp);
                   1670:                mutex_enter(&filelist_lock);
                   1671:                np = LIST_NEXT(dp, f_list);
                   1672:                LIST_REMOVE(dp, f_list);
                   1673:        }
                   1674: }
                   1675:
                   1676: /*
                   1677:  * Garbage collector thread.  While SCM_RIGHTS messages are in transit,
                   1678:  * wake once per second to garbage collect.  Run continually while we
                   1679:  * have deferred closes to process.
                   1680:  */
                   1681: static void
                   1682: unp_thread(void *cookie)
                   1683: {
                   1684:        file_t *dp;
                   1685:
                   1686:        /* Allocate a dummy file for our scans. */
                   1687:        if ((dp = fgetdummy()) == NULL) {
                   1688:                panic("unp_thread");
1.1       cgd      1689:        }
1.101     ad       1690:
1.121     mrg      1691:        mutex_enter(&filelist_lock);
                   1692:        for (;;) {
                   1693:                KASSERT(mutex_owned(&filelist_lock));
                   1694:                if (SLIST_EMPTY(&unp_thread_discard)) {
                   1695:                        if (unp_rights != 0) {
                   1696:                                (void)cv_timedwait(&unp_thread_cv,
                   1697:                                    &filelist_lock, hz);
                   1698:                        } else {
                   1699:                                cv_wait(&unp_thread_cv, &filelist_lock);
                   1700:                        }
1.112     ad       1701:                }
1.121     mrg      1702:                unp_gc(dp);
1.39      sommerfe 1703:        }
1.121     mrg      1704:        /* NOTREACHED */
                   1705: }
                   1706:
                   1707: /*
                   1708:  * Kick the garbage collector into action if there is something for
                   1709:  * it to process.
                   1710:  */
                   1711: static void
                   1712: unp_thread_kick(void)
                   1713: {
                   1714:
                   1715:        if (!SLIST_EMPTY(&unp_thread_discard) || unp_rights != 0) {
                   1716:                mutex_enter(&filelist_lock);
                   1717:                cv_signal(&unp_thread_cv);
                   1718:                mutex_exit(&filelist_lock);
1.44      thorpej  1719:        }
1.1       cgd      1720: }
                   1721:
1.5       andrew   1722: void
1.76      matt     1723: unp_dispose(struct mbuf *m)
1.1       cgd      1724: {
1.8       mycroft  1725:
1.1       cgd      1726:        if (m)
1.121     mrg      1727:                unp_scan(m, unp_discard_later, 1);
1.1       cgd      1728: }
                   1729:
1.5       andrew   1730: void
1.106     ad       1731: unp_scan(struct mbuf *m0, void (*op)(file_t *), int discard)
1.1       cgd      1732: {
1.46      augustss 1733:        struct mbuf *m;
1.121     mrg      1734:        file_t **rp, *fp;
1.46      augustss 1735:        struct cmsghdr *cm;
1.121     mrg      1736:        int i, qfds;
1.1       cgd      1737:
                   1738:        while (m0) {
1.48      thorpej  1739:                for (m = m0; m; m = m->m_next) {
1.121     mrg      1740:                        if (m->m_type != MT_CONTROL ||
                   1741:                            m->m_len < sizeof(*cm)) {
                   1742:                                continue;
                   1743:                        }
                   1744:                        cm = mtod(m, struct cmsghdr *);
                   1745:                        if (cm->cmsg_level != SOL_SOCKET ||
                   1746:                            cm->cmsg_type != SCM_RIGHTS)
                   1747:                                continue;
                   1748:                        qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
                   1749:                            / sizeof(file_t *);
                   1750:                        rp = (file_t **)CMSG_DATA(cm);
                   1751:                        for (i = 0; i < qfds; i++) {
                   1752:                                fp = *rp;
                   1753:                                if (discard) {
                   1754:                                        *rp = 0;
1.39      sommerfe 1755:                                }
1.121     mrg      1756:                                (*op)(fp);
                   1757:                                rp++;
1.1       cgd      1758:                        }
1.48      thorpej  1759:                }
1.52      thorpej  1760:                m0 = m0->m_nextpkt;
1.1       cgd      1761:        }
                   1762: }
                   1763:
1.5       andrew   1764: void
1.106     ad       1765: unp_mark(file_t *fp)
1.1       cgd      1766: {
1.101     ad       1767:
1.39      sommerfe 1768:        if (fp == NULL)
                   1769:                return;
1.80      perry    1770:
1.39      sommerfe 1771:        /* If we're already deferred, don't screw up the defer count */
1.106     ad       1772:        mutex_enter(&fp->f_lock);
1.101     ad       1773:        if (fp->f_flag & (FMARK | FDEFER)) {
1.106     ad       1774:                mutex_exit(&fp->f_lock);
1.1       cgd      1775:                return;
1.101     ad       1776:        }
1.39      sommerfe 1777:
                   1778:        /*
1.121     mrg      1779:         * Minimize the number of deferrals...  Sockets are the only type of
                   1780:         * file which can hold references to another file, so just mark
                   1781:         * other files, and defer unmarked sockets for the next pass.
1.39      sommerfe 1782:         */
                   1783:        if (fp->f_type == DTYPE_SOCKET) {
                   1784:                unp_defer++;
1.106     ad       1785:                KASSERT(fp->f_count != 0);
                   1786:                atomic_or_uint(&fp->f_flag, FDEFER);
1.39      sommerfe 1787:        } else {
1.106     ad       1788:                atomic_or_uint(&fp->f_flag, FMARK);
1.39      sommerfe 1789:        }
1.106     ad       1790:        mutex_exit(&fp->f_lock);
1.1       cgd      1791: }
                   1792:
1.121     mrg      1793: static void
                   1794: unp_discard_now(file_t *fp)
1.1       cgd      1795: {
1.106     ad       1796:
1.39      sommerfe 1797:        if (fp == NULL)
                   1798:                return;
1.106     ad       1799:
1.121     mrg      1800:        KASSERT(fp->f_count > 0);
                   1801:        KASSERT(fp->f_msgcount > 0);
                   1802:
1.106     ad       1803:        mutex_enter(&fp->f_lock);
1.1       cgd      1804:        fp->f_msgcount--;
1.106     ad       1805:        mutex_exit(&fp->f_lock);
                   1806:        atomic_dec_uint(&unp_rights);
                   1807:        (void)closef(fp);
1.1       cgd      1808: }
1.121     mrg      1809:
                   1810: static void
                   1811: unp_discard_later(file_t *fp)
                   1812: {
                   1813:
                   1814:        if (fp == NULL)
                   1815:                return;
                   1816:
                   1817:        KASSERT(fp->f_count > 0);
                   1818:        KASSERT(fp->f_msgcount > 0);
                   1819:
                   1820:        mutex_enter(&filelist_lock);
                   1821:        if (fp->f_unpcount++ == 0) {
                   1822:                SLIST_INSERT_HEAD(&unp_thread_discard, fp, f_unplist);
                   1823:        }
                   1824:        mutex_exit(&filelist_lock);
                   1825: }
1.151     rmind    1826:
                   1827: const struct pr_usrreqs unp_usrreqs = {
1.152     rmind    1828:        .pr_attach      = unp_attach,
                   1829:        .pr_detach      = unp_detach,
1.154     rtr      1830:        .pr_ioctl       = unp_ioctl,
1.156     rtr      1831:        .pr_stat        = unp_stat,
1.151     rmind    1832:        .pr_generic     = unp_usrreq,
                   1833: };

CVSweb <webmaster@jp.NetBSD.org>