[BACK]Return to uipc_usrreq.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_usrreq.c, Revision 1.48

1.48    ! thorpej     1: /*     $NetBSD: uipc_usrreq.c,v 1.47 2000/06/05 06:06:07 thorpej Exp $ */
1.30      thorpej     2:
                      3: /*-
1.47      thorpej     4:  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
1.30      thorpej     5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
                      9:  * NASA Ames Research Center.
                     10:  *
                     11:  * Redistribution and use in source and binary forms, with or without
                     12:  * modification, are permitted provided that the following conditions
                     13:  * are met:
                     14:  * 1. Redistributions of source code must retain the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer.
                     16:  * 2. Redistributions in binary form must reproduce the above copyright
                     17:  *    notice, this list of conditions and the following disclaimer in the
                     18:  *    documentation and/or other materials provided with the distribution.
                     19:  * 3. All advertising materials mentioning features or use of this software
                     20:  *    must display the following acknowledgement:
                     21:  *     This product includes software developed by the NetBSD
                     22:  *     Foundation, Inc. and its contributors.
                     23:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     24:  *    contributors may be used to endorse or promote products derived
                     25:  *    from this software without specific prior written permission.
                     26:  *
                     27:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     28:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     29:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     30:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     31:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     32:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     33:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     34:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     35:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     36:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     37:  * POSSIBILITY OF SUCH DAMAGE.
                     38:  */
1.10      cgd        39:
1.1       cgd        40: /*
1.24      cgd        41:  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
1.8       mycroft    42:  * Copyright (c) 1982, 1986, 1989, 1991, 1993
                     43:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd        44:  *
                     45:  * Redistribution and use in source and binary forms, with or without
                     46:  * modification, are permitted provided that the following conditions
                     47:  * are met:
                     48:  * 1. Redistributions of source code must retain the above copyright
                     49:  *    notice, this list of conditions and the following disclaimer.
                     50:  * 2. Redistributions in binary form must reproduce the above copyright
                     51:  *    notice, this list of conditions and the following disclaimer in the
                     52:  *    documentation and/or other materials provided with the distribution.
                     53:  * 3. All advertising materials mentioning features or use of this software
                     54:  *    must display the following acknowledgement:
                     55:  *     This product includes software developed by the University of
                     56:  *     California, Berkeley and its contributors.
                     57:  * 4. Neither the name of the University nor the names of its contributors
                     58:  *    may be used to endorse or promote products derived from this software
                     59:  *    without specific prior written permission.
                     60:  *
                     61:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     62:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     63:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     64:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     65:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     66:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     67:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     68:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     69:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     70:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     71:  * SUCH DAMAGE.
                     72:  *
1.31      fvdl       73:  *     @(#)uipc_usrreq.c       8.9 (Berkeley) 5/14/95
1.1       cgd        74:  */
                     75:
1.7       mycroft    76: #include <sys/param.h>
1.8       mycroft    77: #include <sys/systm.h>
1.7       mycroft    78: #include <sys/proc.h>
                     79: #include <sys/filedesc.h>
                     80: #include <sys/domain.h>
                     81: #include <sys/protosw.h>
                     82: #include <sys/socket.h>
                     83: #include <sys/socketvar.h>
                     84: #include <sys/unpcb.h>
                     85: #include <sys/un.h>
                     86: #include <sys/namei.h>
                     87: #include <sys/vnode.h>
                     88: #include <sys/file.h>
                     89: #include <sys/stat.h>
                     90: #include <sys/mbuf.h>
1.1       cgd        91:
                     92: /*
                     93:  * Unix communications domain.
                     94:  *
                     95:  * TODO:
                     96:  *     SEQPACKET, RDM
                     97:  *     rethink name space problems
                     98:  *     need a proper out-of-band
                     99:  */
1.34      lukem     100: struct sockaddr_un sun_noname = { sizeof(sun_noname), AF_LOCAL };
1.1       cgd       101: ino_t  unp_ino;                        /* prototype for fake inode numbers */
                    102:
1.30      thorpej   103: struct mbuf *unp_addsockcred __P((struct proc *, struct mbuf *));
                    104:
1.20      mycroft   105: int
1.30      thorpej   106: unp_output(m, control, unp, p)
1.20      mycroft   107:        struct mbuf *m, *control;
                    108:        struct unpcb *unp;
1.30      thorpej   109:        struct proc *p;
1.20      mycroft   110: {
                    111:        struct socket *so2;
                    112:        struct sockaddr_un *sun;
                    113:
                    114:        so2 = unp->unp_conn->unp_socket;
                    115:        if (unp->unp_addr)
                    116:                sun = unp->unp_addr;
                    117:        else
                    118:                sun = &sun_noname;
1.30      thorpej   119:        if (unp->unp_conn->unp_flags & UNP_WANTCRED)
                    120:                control = unp_addsockcred(p, control);
1.20      mycroft   121:        if (sbappendaddr(&so2->so_rcv, (struct sockaddr *)sun, m,
                    122:            control) == 0) {
                    123:                m_freem(control);
                    124:                m_freem(m);
                    125:                return (EINVAL);
                    126:        } else {
                    127:                sorwakeup(so2);
                    128:                return (0);
                    129:        }
                    130: }
                    131:
                    132: void
                    133: unp_setsockaddr(unp, nam)
1.46      augustss  134:        struct unpcb *unp;
1.20      mycroft   135:        struct mbuf *nam;
                    136: {
                    137:        struct sockaddr_un *sun;
                    138:
                    139:        if (unp->unp_addr)
                    140:                sun = unp->unp_addr;
                    141:        else
                    142:                sun = &sun_noname;
                    143:        nam->m_len = sun->sun_len;
1.27      thorpej   144:        if (nam->m_len > MLEN)
                    145:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36      perry     146:        memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20      mycroft   147: }
                    148:
                    149: void
                    150: unp_setpeeraddr(unp, nam)
1.46      augustss  151:        struct unpcb *unp;
1.20      mycroft   152:        struct mbuf *nam;
                    153: {
                    154:        struct sockaddr_un *sun;
                    155:
                    156:        if (unp->unp_conn && unp->unp_conn->unp_addr)
                    157:                sun = unp->unp_conn->unp_addr;
                    158:        else
                    159:                sun = &sun_noname;
                    160:        nam->m_len = sun->sun_len;
1.27      thorpej   161:        if (nam->m_len > MLEN)
                    162:                MEXTMALLOC(nam, nam->m_len, M_WAITOK);
1.36      perry     163:        memcpy(mtod(nam, caddr_t), sun, (size_t)nam->m_len);
1.20      mycroft   164: }
                    165:
1.1       cgd       166: /*ARGSUSED*/
1.5       andrew    167: int
1.19      mycroft   168: uipc_usrreq(so, req, m, nam, control, p)
1.1       cgd       169:        struct socket *so;
                    170:        int req;
                    171:        struct mbuf *m, *nam, *control;
1.19      mycroft   172:        struct proc *p;
1.1       cgd       173: {
                    174:        struct unpcb *unp = sotounpcb(so);
1.46      augustss  175:        struct socket *so2;
                    176:        int error = 0;
1.1       cgd       177:
                    178:        if (req == PRU_CONTROL)
                    179:                return (EOPNOTSUPP);
1.20      mycroft   180:
1.22      mycroft   181: #ifdef DIAGNOSTIC
                    182:        if (req != PRU_SEND && req != PRU_SENDOOB && control)
                    183:                panic("uipc_usrreq: unexpected control mbuf");
                    184: #endif
1.1       cgd       185:        if (unp == 0 && req != PRU_ATTACH) {
                    186:                error = EINVAL;
                    187:                goto release;
                    188:        }
1.20      mycroft   189:
1.1       cgd       190:        switch (req) {
                    191:
                    192:        case PRU_ATTACH:
1.20      mycroft   193:                if (unp != 0) {
1.1       cgd       194:                        error = EISCONN;
                    195:                        break;
                    196:                }
                    197:                error = unp_attach(so);
                    198:                break;
                    199:
                    200:        case PRU_DETACH:
                    201:                unp_detach(unp);
                    202:                break;
                    203:
                    204:        case PRU_BIND:
                    205:                error = unp_bind(unp, nam, p);
                    206:                break;
                    207:
                    208:        case PRU_LISTEN:
                    209:                if (unp->unp_vnode == 0)
                    210:                        error = EINVAL;
                    211:                break;
                    212:
                    213:        case PRU_CONNECT:
                    214:                error = unp_connect(so, nam, p);
                    215:                break;
                    216:
                    217:        case PRU_CONNECT2:
                    218:                error = unp_connect2(so, (struct socket *)nam);
                    219:                break;
                    220:
                    221:        case PRU_DISCONNECT:
                    222:                unp_disconnect(unp);
                    223:                break;
                    224:
                    225:        case PRU_ACCEPT:
1.20      mycroft   226:                unp_setpeeraddr(unp, nam);
1.1       cgd       227:                break;
                    228:
                    229:        case PRU_SHUTDOWN:
                    230:                socantsendmore(so);
                    231:                unp_shutdown(unp);
                    232:                break;
                    233:
                    234:        case PRU_RCVD:
                    235:                switch (so->so_type) {
                    236:
                    237:                case SOCK_DGRAM:
                    238:                        panic("uipc 1");
                    239:                        /*NOTREACHED*/
                    240:
                    241:                case SOCK_STREAM:
                    242: #define        rcv (&so->so_rcv)
                    243: #define snd (&so2->so_snd)
                    244:                        if (unp->unp_conn == 0)
                    245:                                break;
                    246:                        so2 = unp->unp_conn->unp_socket;
                    247:                        /*
                    248:                         * Adjust backpressure on sender
                    249:                         * and wakeup any waiting to write.
                    250:                         */
                    251:                        snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
                    252:                        unp->unp_mbcnt = rcv->sb_mbcnt;
                    253:                        snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
                    254:                        unp->unp_cc = rcv->sb_cc;
                    255:                        sowwakeup(so2);
                    256: #undef snd
                    257: #undef rcv
                    258:                        break;
                    259:
                    260:                default:
                    261:                        panic("uipc 2");
                    262:                }
                    263:                break;
                    264:
                    265:        case PRU_SEND:
1.30      thorpej   266:                /*
                    267:                 * Note: unp_internalize() rejects any control message
                    268:                 * other than SCM_RIGHTS, and only allows one.  This
                    269:                 * has the side-effect of preventing a caller from
                    270:                 * forging SCM_CREDS.
                    271:                 */
1.1       cgd       272:                if (control && (error = unp_internalize(control, p)))
                    273:                        break;
                    274:                switch (so->so_type) {
                    275:
                    276:                case SOCK_DGRAM: {
                    277:                        if (nam) {
1.20      mycroft   278:                                if ((so->so_state & SS_ISCONNECTED) != 0) {
1.1       cgd       279:                                        error = EISCONN;
1.21      mycroft   280:                                        goto die;
1.1       cgd       281:                                }
                    282:                                error = unp_connect(so, nam, p);
1.20      mycroft   283:                                if (error) {
1.23      mycroft   284:                                die:
1.21      mycroft   285:                                        m_freem(control);
1.20      mycroft   286:                                        m_freem(m);
1.1       cgd       287:                                        break;
1.20      mycroft   288:                                }
1.1       cgd       289:                        } else {
1.20      mycroft   290:                                if ((so->so_state & SS_ISCONNECTED) == 0) {
1.1       cgd       291:                                        error = ENOTCONN;
1.21      mycroft   292:                                        goto die;
1.1       cgd       293:                                }
                    294:                        }
1.30      thorpej   295:                        error = unp_output(m, control, unp, p);
1.1       cgd       296:                        if (nam)
                    297:                                unp_disconnect(unp);
                    298:                        break;
                    299:                }
                    300:
                    301:                case SOCK_STREAM:
                    302: #define        rcv (&so2->so_rcv)
                    303: #define        snd (&so->so_snd)
                    304:                        if (unp->unp_conn == 0)
                    305:                                panic("uipc 3");
                    306:                        so2 = unp->unp_conn->unp_socket;
1.30      thorpej   307:                        if (unp->unp_conn->unp_flags & UNP_WANTCRED) {
                    308:                                /*
                    309:                                 * Credentials are passed only once on
                    310:                                 * SOCK_STREAM.
                    311:                                 */
                    312:                                unp->unp_conn->unp_flags &= ~UNP_WANTCRED;
                    313:                                control = unp_addsockcred(p, control);
                    314:                        }
1.1       cgd       315:                        /*
                    316:                         * Send to paired receive port, and then reduce
                    317:                         * send buffer hiwater marks to maintain backpressure.
                    318:                         * Wake up readers.
                    319:                         */
                    320:                        if (control) {
1.21      mycroft   321:                                if (sbappendcontrol(rcv, m, control) == 0)
                    322:                                        m_freem(control);
1.1       cgd       323:                        } else
                    324:                                sbappend(rcv, m);
                    325:                        snd->sb_mbmax -=
                    326:                            rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
                    327:                        unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
                    328:                        snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
                    329:                        unp->unp_conn->unp_cc = rcv->sb_cc;
                    330:                        sorwakeup(so2);
                    331: #undef snd
                    332: #undef rcv
                    333:                        break;
                    334:
                    335:                default:
                    336:                        panic("uipc 4");
                    337:                }
                    338:                break;
                    339:
                    340:        case PRU_ABORT:
                    341:                unp_drop(unp, ECONNABORTED);
1.39      sommerfe  342:
                    343: #ifdef DIAGNOSTIC
                    344:                if (so->so_pcb == 0)
                    345:                        panic("uipc 5: drop killed pcb");
                    346: #endif
                    347:                unp_detach(unp);
1.1       cgd       348:                break;
                    349:
                    350:        case PRU_SENSE:
                    351:                ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
                    352:                if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
                    353:                        so2 = unp->unp_conn->unp_socket;
                    354:                        ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
                    355:                }
                    356:                ((struct stat *) m)->st_dev = NODEV;
                    357:                if (unp->unp_ino == 0)
                    358:                        unp->unp_ino = unp_ino++;
1.25      kleink    359:                ((struct stat *) m)->st_atimespec =
                    360:                    ((struct stat *) m)->st_mtimespec =
                    361:                    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
1.1       cgd       362:                ((struct stat *) m)->st_ino = unp->unp_ino;
                    363:                return (0);
                    364:
                    365:        case PRU_RCVOOB:
1.20      mycroft   366:                error = EOPNOTSUPP;
                    367:                break;
1.1       cgd       368:
                    369:        case PRU_SENDOOB:
1.22      mycroft   370:                m_freem(control);
1.20      mycroft   371:                m_freem(m);
1.1       cgd       372:                error = EOPNOTSUPP;
                    373:                break;
                    374:
                    375:        case PRU_SOCKADDR:
1.20      mycroft   376:                unp_setsockaddr(unp, nam);
1.1       cgd       377:                break;
                    378:
                    379:        case PRU_PEERADDR:
1.20      mycroft   380:                unp_setpeeraddr(unp, nam);
1.1       cgd       381:                break;
                    382:
                    383:        default:
                    384:                panic("piusrreq");
                    385:        }
1.20      mycroft   386:
1.1       cgd       387: release:
                    388:        return (error);
                    389: }
                    390:
                    391: /*
1.30      thorpej   392:  * Unix domain socket option processing.
                    393:  */
                    394: int
                    395: uipc_ctloutput(op, so, level, optname, mp)
                    396:        int op;
                    397:        struct socket *so;
                    398:        int level, optname;
                    399:        struct mbuf **mp;
                    400: {
                    401:        struct unpcb *unp = sotounpcb(so);
                    402:        struct mbuf *m = *mp;
                    403:        int optval = 0, error = 0;
                    404:
                    405:        if (level != 0) {
                    406:                error = EINVAL;
                    407:                if (op == PRCO_SETOPT && m)
                    408:                        (void) m_free(m);
                    409:        } else switch (op) {
                    410:
                    411:        case PRCO_SETOPT:
                    412:                switch (optname) {
                    413:                case LOCAL_CREDS:
                    414:                        if (m == NULL || m->m_len != sizeof(int))
                    415:                                error = EINVAL;
                    416:                        else {
                    417:                                optval = *mtod(m, int *);
                    418:                                switch (optname) {
                    419: #define        OPTSET(bit) \
                    420:        if (optval) \
                    421:                unp->unp_flags |= (bit); \
                    422:        else \
                    423:                unp->unp_flags &= ~(bit);
                    424:
                    425:                                case LOCAL_CREDS:
                    426:                                        OPTSET(UNP_WANTCRED);
                    427:                                        break;
                    428:                                }
                    429:                        }
                    430:                        break;
                    431: #undef OPTSET
                    432:
                    433:                default:
                    434:                        error = ENOPROTOOPT;
                    435:                        break;
                    436:                }
                    437:                if (m)
                    438:                        (void) m_free(m);
                    439:                break;
                    440:
                    441:        case PRCO_GETOPT:
                    442:                switch (optname) {
                    443:                case LOCAL_CREDS:
                    444:                        *mp = m = m_get(M_WAIT, MT_SOOPTS);
                    445:                        m->m_len = sizeof(int);
                    446:                        switch (optname) {
                    447:
                    448: #define        OPTBIT(bit)     (unp->unp_flags & (bit) ? 1 : 0)
                    449:
                    450:                        case LOCAL_CREDS:
                    451:                                optval = OPTBIT(UNP_WANTCRED);
                    452:                                break;
                    453:                        }
                    454:                        *mtod(m, int *) = optval;
                    455:                        break;
                    456: #undef OPTBIT
                    457:
                    458:                default:
                    459:                        error = ENOPROTOOPT;
                    460:                        break;
                    461:                }
                    462:                break;
                    463:        }
                    464:        return (error);
                    465: }
                    466:
                    467: /*
1.1       cgd       468:  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
                    469:  * for stream sockets, although the total for sender and receiver is
                    470:  * actually only PIPSIZ.
                    471:  * Datagram sockets really use the sendspace as the maximum datagram size,
                    472:  * and don't really want to reserve the sendspace.  Their recvspace should
                    473:  * be large enough for at least one max-size datagram plus address.
                    474:  */
                    475: #define        PIPSIZ  4096
                    476: u_long unpst_sendspace = PIPSIZ;
                    477: u_long unpst_recvspace = PIPSIZ;
                    478: u_long unpdg_sendspace = 2*1024;       /* really max datagram size */
                    479: u_long unpdg_recvspace = 4*1024;
                    480:
                    481: int    unp_rights;                     /* file descriptors in flight */
                    482:
1.5       andrew    483: int
1.1       cgd       484: unp_attach(so)
                    485:        struct socket *so;
                    486: {
1.46      augustss  487:        struct unpcb *unp;
1.25      kleink    488:        struct timeval tv;
1.1       cgd       489:        int error;
                    490:
                    491:        if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
                    492:                switch (so->so_type) {
                    493:
                    494:                case SOCK_STREAM:
                    495:                        error = soreserve(so, unpst_sendspace, unpst_recvspace);
                    496:                        break;
                    497:
                    498:                case SOCK_DGRAM:
                    499:                        error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
                    500:                        break;
1.8       mycroft   501:
                    502:                default:
                    503:                        panic("unp_attach");
1.1       cgd       504:                }
                    505:                if (error)
                    506:                        return (error);
                    507:        }
1.14      mycroft   508:        unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
                    509:        if (unp == NULL)
1.1       cgd       510:                return (ENOBUFS);
1.36      perry     511:        memset((caddr_t)unp, 0, sizeof(*unp));
1.14      mycroft   512:        unp->unp_socket = so;
1.15      mycroft   513:        so->so_pcb = unp;
1.25      kleink    514:        microtime(&tv);
                    515:        TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
1.1       cgd       516:        return (0);
                    517: }
                    518:
1.17      pk        519: void
1.1       cgd       520: unp_detach(unp)
1.46      augustss  521:        struct unpcb *unp;
1.1       cgd       522: {
                    523:
                    524:        if (unp->unp_vnode) {
                    525:                unp->unp_vnode->v_socket = 0;
                    526:                vrele(unp->unp_vnode);
                    527:                unp->unp_vnode = 0;
                    528:        }
                    529:        if (unp->unp_conn)
                    530:                unp_disconnect(unp);
                    531:        while (unp->unp_refs)
                    532:                unp_drop(unp->unp_refs, ECONNRESET);
                    533:        soisdisconnected(unp->unp_socket);
                    534:        unp->unp_socket->so_pcb = 0;
1.20      mycroft   535:        if (unp->unp_addr)
1.26      thorpej   536:                free(unp->unp_addr, M_SONAME);
1.8       mycroft   537:        if (unp_rights) {
                    538:                /*
                    539:                 * Normally the receive buffer is flushed later,
                    540:                 * in sofree, but if our receive buffer holds references
                    541:                 * to descriptors that are now garbage, we will dispose
                    542:                 * of those descriptor references after the garbage collector
                    543:                 * gets them (resulting in a "panic: closef: count < 0").
                    544:                 */
                    545:                sorflush(unp->unp_socket);
1.14      mycroft   546:                free(unp, M_PCB);
1.1       cgd       547:                unp_gc();
1.14      mycroft   548:        } else
                    549:                free(unp, M_PCB);
1.1       cgd       550: }
                    551:
1.5       andrew    552: int
1.1       cgd       553: unp_bind(unp, nam, p)
                    554:        struct unpcb *unp;
                    555:        struct mbuf *nam;
                    556:        struct proc *p;
                    557: {
1.27      thorpej   558:        struct sockaddr_un *sun;
1.46      augustss  559:        struct vnode *vp;
1.1       cgd       560:        struct vattr vattr;
1.27      thorpej   561:        size_t addrlen;
1.1       cgd       562:        int error;
                    563:        struct nameidata nd;
                    564:
1.20      mycroft   565:        if (unp->unp_vnode != 0)
                    566:                return (EINVAL);
1.27      thorpej   567:
                    568:        /*
                    569:         * Allocate the new sockaddr.  We have to allocate one
                    570:         * extra byte so that we can ensure that the pathname
                    571:         * is nul-terminated.
                    572:         */
                    573:        addrlen = nam->m_len + 1;
                    574:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
                    575:        m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
                    576:        *(((char *)sun) + nam->m_len) = '\0';
                    577:
1.9       mycroft   578:        NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
1.20      mycroft   579:            sun->sun_path, p);
1.27      thorpej   580:
1.1       cgd       581: /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
1.16      christos  582:        if ((error = namei(&nd)) != 0)
1.27      thorpej   583:                goto bad;
1.9       mycroft   584:        vp = nd.ni_vp;
1.1       cgd       585:        if (vp != NULL) {
1.9       mycroft   586:                VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
                    587:                if (nd.ni_dvp == vp)
                    588:                        vrele(nd.ni_dvp);
1.1       cgd       589:                else
1.9       mycroft   590:                        vput(nd.ni_dvp);
1.1       cgd       591:                vrele(vp);
1.27      thorpej   592:                error = EADDRINUSE;
                    593:                goto bad;
1.1       cgd       594:        }
                    595:        VATTR_NULL(&vattr);
                    596:        vattr.va_type = VSOCK;
1.9       mycroft   597:        vattr.va_mode = ACCESSPERMS;
1.12      mycroft   598:        VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
1.16      christos  599:        error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
                    600:        if (error)
1.27      thorpej   601:                goto bad;
1.9       mycroft   602:        vp = nd.ni_vp;
1.1       cgd       603:        vp->v_socket = unp->unp_socket;
                    604:        unp->unp_vnode = vp;
1.27      thorpej   605:        unp->unp_addrlen = addrlen;
                    606:        unp->unp_addr = sun;
1.31      fvdl      607:        VOP_UNLOCK(vp, 0);
1.1       cgd       608:        return (0);
1.27      thorpej   609:
                    610:  bad:
                    611:        free(sun, M_SONAME);
                    612:        return (error);
1.1       cgd       613: }
                    614:
1.5       andrew    615: int
1.1       cgd       616: unp_connect(so, nam, p)
                    617:        struct socket *so;
                    618:        struct mbuf *nam;
                    619:        struct proc *p;
                    620: {
1.46      augustss  621:        struct sockaddr_un *sun;
                    622:        struct vnode *vp;
                    623:        struct socket *so2, *so3;
1.1       cgd       624:        struct unpcb *unp2, *unp3;
1.27      thorpej   625:        size_t addrlen;
1.1       cgd       626:        int error;
                    627:        struct nameidata nd;
                    628:
1.27      thorpej   629:        /*
                    630:         * Allocate a temporary sockaddr.  We have to allocate one extra
                    631:         * byte so that we can ensure that the pathname is nul-terminated.
                    632:         * When we establish the connection, we copy the other PCB's
                    633:         * sockaddr to our own.
                    634:         */
                    635:        addrlen = nam->m_len + 1;
                    636:        sun = malloc(addrlen, M_SONAME, M_WAITOK);
                    637:        m_copydata(nam, 0, nam->m_len, (caddr_t)sun);
                    638:        *(((char *)sun) + nam->m_len) = '\0';
                    639:
1.20      mycroft   640:        NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, sun->sun_path, p);
1.27      thorpej   641:
1.16      christos  642:        if ((error = namei(&nd)) != 0)
1.27      thorpej   643:                goto bad2;
1.9       mycroft   644:        vp = nd.ni_vp;
1.1       cgd       645:        if (vp->v_type != VSOCK) {
                    646:                error = ENOTSOCK;
                    647:                goto bad;
                    648:        }
1.16      christos  649:        if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
1.1       cgd       650:                goto bad;
                    651:        so2 = vp->v_socket;
                    652:        if (so2 == 0) {
                    653:                error = ECONNREFUSED;
                    654:                goto bad;
                    655:        }
                    656:        if (so->so_type != so2->so_type) {
                    657:                error = EPROTOTYPE;
                    658:                goto bad;
                    659:        }
                    660:        if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
                    661:                if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
                    662:                    (so3 = sonewconn(so2, 0)) == 0) {
                    663:                        error = ECONNREFUSED;
                    664:                        goto bad;
                    665:                }
                    666:                unp2 = sotounpcb(so2);
                    667:                unp3 = sotounpcb(so3);
1.26      thorpej   668:                if (unp2->unp_addr) {
                    669:                        unp3->unp_addr = malloc(unp2->unp_addrlen,
                    670:                            M_SONAME, M_WAITOK);
1.36      perry     671:                        memcpy(unp3->unp_addr, unp2->unp_addr,
1.26      thorpej   672:                            unp2->unp_addrlen);
                    673:                        unp3->unp_addrlen = unp2->unp_addrlen;
                    674:                }
1.30      thorpej   675:                unp3->unp_flags = unp2->unp_flags;
1.33      thorpej   676:                so2 = so3;
                    677:        }
                    678:        error = unp_connect2(so, so2);
1.27      thorpej   679:  bad:
1.1       cgd       680:        vput(vp);
1.27      thorpej   681:  bad2:
                    682:        free(sun, M_SONAME);
1.1       cgd       683:        return (error);
                    684: }
                    685:
1.5       andrew    686: int
1.1       cgd       687: unp_connect2(so, so2)
1.46      augustss  688:        struct socket *so;
                    689:        struct socket *so2;
1.1       cgd       690: {
1.46      augustss  691:        struct unpcb *unp = sotounpcb(so);
                    692:        struct unpcb *unp2;
1.1       cgd       693:
                    694:        if (so2->so_type != so->so_type)
                    695:                return (EPROTOTYPE);
                    696:        unp2 = sotounpcb(so2);
                    697:        unp->unp_conn = unp2;
                    698:        switch (so->so_type) {
                    699:
                    700:        case SOCK_DGRAM:
                    701:                unp->unp_nextref = unp2->unp_refs;
                    702:                unp2->unp_refs = unp;
                    703:                soisconnected(so);
                    704:                break;
                    705:
                    706:        case SOCK_STREAM:
                    707:                unp2->unp_conn = unp;
                    708:                soisconnected(so);
                    709:                soisconnected(so2);
                    710:                break;
                    711:
                    712:        default:
                    713:                panic("unp_connect2");
                    714:        }
                    715:        return (0);
                    716: }
                    717:
1.5       andrew    718: void
1.1       cgd       719: unp_disconnect(unp)
                    720:        struct unpcb *unp;
                    721: {
1.46      augustss  722:        struct unpcb *unp2 = unp->unp_conn;
1.1       cgd       723:
                    724:        if (unp2 == 0)
                    725:                return;
                    726:        unp->unp_conn = 0;
                    727:        switch (unp->unp_socket->so_type) {
                    728:
                    729:        case SOCK_DGRAM:
                    730:                if (unp2->unp_refs == unp)
                    731:                        unp2->unp_refs = unp->unp_nextref;
                    732:                else {
                    733:                        unp2 = unp2->unp_refs;
                    734:                        for (;;) {
                    735:                                if (unp2 == 0)
                    736:                                        panic("unp_disconnect");
                    737:                                if (unp2->unp_nextref == unp)
                    738:                                        break;
                    739:                                unp2 = unp2->unp_nextref;
                    740:                        }
                    741:                        unp2->unp_nextref = unp->unp_nextref;
                    742:                }
                    743:                unp->unp_nextref = 0;
                    744:                unp->unp_socket->so_state &= ~SS_ISCONNECTED;
                    745:                break;
                    746:
                    747:        case SOCK_STREAM:
                    748:                soisdisconnected(unp->unp_socket);
                    749:                unp2->unp_conn = 0;
                    750:                soisdisconnected(unp2->unp_socket);
                    751:                break;
                    752:        }
                    753: }
                    754:
                    755: #ifdef notdef
                    756: unp_abort(unp)
                    757:        struct unpcb *unp;
                    758: {
                    759:
                    760:        unp_detach(unp);
                    761: }
                    762: #endif
                    763:
1.5       andrew    764: void
1.1       cgd       765: unp_shutdown(unp)
                    766:        struct unpcb *unp;
                    767: {
                    768:        struct socket *so;
                    769:
                    770:        if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
                    771:            (so = unp->unp_conn->unp_socket))
                    772:                socantrcvmore(so);
                    773: }
                    774:
1.5       andrew    775: void
1.1       cgd       776: unp_drop(unp, errno)
                    777:        struct unpcb *unp;
                    778:        int errno;
                    779: {
                    780:        struct socket *so = unp->unp_socket;
                    781:
                    782:        so->so_error = errno;
                    783:        unp_disconnect(unp);
                    784:        if (so->so_head) {
1.15      mycroft   785:                so->so_pcb = 0;
1.14      mycroft   786:                sofree(so);
1.20      mycroft   787:                if (unp->unp_addr)
1.26      thorpej   788:                        free(unp->unp_addr, M_SONAME);
1.14      mycroft   789:                free(unp, M_PCB);
1.1       cgd       790:        }
                    791: }
                    792:
                    793: #ifdef notdef
                    794: unp_drain()
                    795: {
                    796:
                    797: }
                    798: #endif
                    799:
1.5       andrew    800: int
1.1       cgd       801: unp_externalize(rights)
                    802:        struct mbuf *rights;
                    803: {
                    804:        struct proc *p = curproc;               /* XXX */
1.46      augustss  805:        struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1.47      thorpej   806:        int i, *fdp;
1.46      augustss  807:        struct file **rp;
                    808:        struct file *fp;
1.47      thorpej   809:        int nfds, f, error = 0;
                    810:
                    811:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
                    812:            sizeof(struct file *);
                    813:        fdp = (int *)CMSG_DATA(cm);
                    814:        rp = (struct file **)CMSG_DATA(cm);
1.1       cgd       815:
1.39      sommerfe  816:        /* Make sure the recipient should be able to see the descriptors.. */
1.42      thorpej   817:        if (p->p_cwdi->cwdi_rdir != NULL) {
1.48    ! thorpej   818:                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  819:                for (i = 0; i < nfds; i++) {
                    820:                        fp = *rp++;
                    821:                        /*
                    822:                         * If we are in a chroot'ed directory, and
                    823:                         * someone wants to pass us a directory, make
                    824:                         * sure it's inside the subtree we're allowed
                    825:                         * to access.
                    826:                         */
                    827:                        if (fp->f_type == DTYPE_VNODE) {
                    828:                                struct vnode *vp = (struct vnode *)fp->f_data;
                    829:                                if ((vp->v_type == VDIR) &&
1.42      thorpej   830:                                    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
1.39      sommerfe  831:                                        error = EPERM;
                    832:                                        break;
                    833:                                }
                    834:                        }
                    835:                }
                    836:        }
1.47      thorpej   837:        rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe  838:
1.24      cgd       839:        /* Make sure that the recipient has space */
1.39      sommerfe  840:        if (error || (!fdavail(p, nfds))) {
1.24      cgd       841:                for (i = 0; i < nfds; i++) {
1.1       cgd       842:                        fp = *rp;
1.39      sommerfe  843:                        /*
                    844:                         * zero the pointer before calling unp_discard,
                    845:                         * since it may end up in unp_gc()..
                    846:                         */
                    847:                        *rp++ = 0;
1.1       cgd       848:                        unp_discard(fp);
                    849:                }
1.39      sommerfe  850:                return (error ? error : EMSGSIZE);
1.1       cgd       851:        }
1.39      sommerfe  852:
1.24      cgd       853:        /*
                    854:         * Add file to the recipient's open file table, converting them
                    855:         * to integer file descriptors as we go.  Done in forward order
                    856:         * because an integer will always come in the same place or before
                    857:         * its corresponding struct file pointer.
                    858:         */
                    859:        for (i = 0; i < nfds; i++) {
1.39      sommerfe  860:                fp = *rp++;
                    861:                fp->f_msgcount--;
                    862:                unp_rights--;
                    863:
1.1       cgd       864:                if (fdalloc(p, 0, &f))
                    865:                        panic("unp_externalize");
                    866:                p->p_fd->fd_ofiles[f] = fp;
1.24      cgd       867:                *fdp++ = f;
1.1       cgd       868:        }
1.24      cgd       869:
                    870:        /*
                    871:         * Adjust length, in case of transition from large struct file
                    872:         * pointers to ints.
                    873:         */
1.47      thorpej   874:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
                    875:        rights->m_len = CMSG_SPACE(nfds * sizeof(int));
1.1       cgd       876:        return (0);
                    877: }
                    878:
1.5       andrew    879: int
1.1       cgd       880: unp_internalize(control, p)
                    881:        struct mbuf *control;
                    882:        struct proc *p;
                    883: {
1.24      cgd       884:        struct filedesc *fdescp = p->p_fd;
1.46      augustss  885:        struct cmsghdr *cm = mtod(control, struct cmsghdr *);
                    886:        struct file **rp;
                    887:        struct file *fp;
                    888:        int i, fd, *fdp;
1.24      cgd       889:        int nfds;
                    890:        u_int neededspace;
1.38      thorpej   891:
1.24      cgd       892:        /* Sanity check the control message header */
1.1       cgd       893:        if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
                    894:            cm->cmsg_len != control->m_len)
                    895:                return (EINVAL);
1.24      cgd       896:
                    897:        /* Verify that the file descriptors are valid */
1.47      thorpej   898:        nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof(int);
                    899:        fdp = (int *)CMSG_DATA(cm);
1.24      cgd       900:        for (i = 0; i < nfds; i++) {
                    901:                fd = *fdp++;
                    902:                if ((unsigned)fd >= fdescp->fd_nfiles ||
1.44      thorpej   903:                    fdescp->fd_ofiles[fd] == NULL ||
                    904:                    (fdescp->fd_ofiles[fd]->f_iflags & FIF_WANTCLOSE) != 0)
1.1       cgd       905:                        return (EBADF);
                    906:        }
1.24      cgd       907:
                    908:        /* Make sure we have room for the struct file pointers */
1.47      thorpej   909:  morespace:
                    910:        neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
                    911:            control->m_len;
1.24      cgd       912:        if (neededspace > M_TRAILINGSPACE(control)) {
                    913:
                    914:                /* if we already have a cluster, the message is just too big */
                    915:                if (control->m_flags & M_EXT)
                    916:                        return (E2BIG);
                    917:
                    918:                /* allocate a cluster and try again */
                    919:                MCLGET(control, M_WAIT);
                    920:                if ((control->m_flags & M_EXT) == 0)
                    921:                        return (ENOBUFS);       /* allocation failed */
                    922:
                    923:                /* copy the data to the cluster */
1.36      perry     924:                memcpy(mtod(control, char *), cm, cm->cmsg_len);
1.24      cgd       925:                cm = mtod(control, struct cmsghdr *);
                    926:                goto morespace;
                    927:        }
                    928:
                    929:        /* adjust message & mbuf to note amount of space actually used. */
1.47      thorpej   930:        cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
                    931:        control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
1.24      cgd       932:
                    933:        /*
                    934:         * Transform the file descriptors into struct file pointers, in
                    935:         * reverse order so that if pointers are bigger than ints, the
                    936:         * int won't get until we're done.
                    937:         */
1.47      thorpej   938:        fdp = ((int *)CMSG_DATA(cm)) + nfds - 1;
                    939:        rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
1.24      cgd       940:        for (i = 0; i < nfds; i++) {
1.28      christos  941:                fp = fdescp->fd_ofiles[*fdp--];
1.44      thorpej   942:                FILE_USE(fp);
1.24      cgd       943:                *rp-- = fp;
1.1       cgd       944:                fp->f_count++;
                    945:                fp->f_msgcount++;
1.44      thorpej   946:                FILE_UNUSE(fp, NULL);
1.1       cgd       947:                unp_rights++;
                    948:        }
                    949:        return (0);
1.30      thorpej   950: }
                    951:
                    952: struct mbuf *
                    953: unp_addsockcred(p, control)
                    954:        struct proc *p;
                    955:        struct mbuf *control;
                    956: {
                    957:        struct cmsghdr *cmp;
                    958:        struct sockcred *sc;
                    959:        struct mbuf *m, *n;
1.47      thorpej   960:        int len, space, i;
1.30      thorpej   961:
1.47      thorpej   962:        len = CMSG_LEN(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
                    963:        space = CMSG_SPACE(SOCKCREDSIZE(p->p_ucred->cr_ngroups));
1.30      thorpej   964:
                    965:        m = m_get(M_WAIT, MT_CONTROL);
1.47      thorpej   966:        if (space > MLEN) {
                    967:                if (space > MCLBYTES)
                    968:                        MEXTMALLOC(m, space, M_WAITOK);
1.30      thorpej   969:                else
                    970:                        MCLGET(m, M_WAIT);
                    971:                if ((m->m_flags & M_EXT) == 0) {
                    972:                        m_free(m);
                    973:                        return (control);
                    974:                }
                    975:        }
                    976:
1.47      thorpej   977:        m->m_len = space;
1.30      thorpej   978:        m->m_next = NULL;
                    979:        cmp = mtod(m, struct cmsghdr *);
                    980:        sc = (struct sockcred *)CMSG_DATA(cmp);
                    981:        cmp->cmsg_len = len;
                    982:        cmp->cmsg_level = SOL_SOCKET;
                    983:        cmp->cmsg_type = SCM_CREDS;
                    984:        sc->sc_uid = p->p_cred->p_ruid;
                    985:        sc->sc_euid = p->p_ucred->cr_uid;
                    986:        sc->sc_gid = p->p_cred->p_rgid;
                    987:        sc->sc_egid = p->p_ucred->cr_gid;
                    988:        sc->sc_ngroups = p->p_ucred->cr_ngroups;
                    989:        for (i = 0; i < sc->sc_ngroups; i++)
                    990:                sc->sc_groups[i] = p->p_ucred->cr_groups[i];
                    991:
                    992:        /*
                    993:         * If a control message already exists, append us to the end.
                    994:         */
                    995:        if (control != NULL) {
                    996:                for (n = control; n->m_next != NULL; n = n->m_next)
                    997:                        ;
                    998:                n->m_next = m;
                    999:        } else
                   1000:                control = m;
                   1001:
                   1002:        return (control);
1.1       cgd      1003: }
                   1004:
                   1005: int    unp_defer, unp_gcing;
                   1006: extern struct domain unixdomain;
                   1007:
1.39      sommerfe 1008: /*
                   1009:  * Comment added long after the fact explaining what's going on here.
                   1010:  * Do a mark-sweep GC of file descriptors on the system, to free up
                   1011:  * any which are caught in flight to an about-to-be-closed socket.
                   1012:  *
                   1013:  * Traditional mark-sweep gc's start at the "root", and mark
                   1014:  * everything reachable from the root (which, in our case would be the
                   1015:  * process table).  The mark bits are cleared during the sweep.
                   1016:  *
                   1017:  * XXX For some inexplicable reason (perhaps because the file
                   1018:  * descriptor tables used to live in the u area which could be swapped
                   1019:  * out and thus hard to reach), we do multiple scans over the set of
                   1020:  * descriptors, using use *two* mark bits per object (DEFER and MARK).
                   1021:  * Whenever we find a descriptor which references other descriptors,
                   1022:  * the ones it references are marked with both bits, and we iterate
                   1023:  * over the whole file table until there are no more DEFER bits set.
                   1024:  * We also make an extra pass *before* the GC to clear the mark bits,
                   1025:  * which could have been cleared at almost no cost during the previous
                   1026:  * sweep.
                   1027:  *
                   1028:  * XXX MP: this needs to run with locks such that no other thread of
                   1029:  * control can create or destroy references to file descriptors. it
                   1030:  * may be necessary to defer the GC until later (when the locking
                   1031:  * situation is more hospitable); it may be necessary to push this
                   1032:  * into a separate thread.
                   1033:  */
1.5       andrew   1034: void
1.1       cgd      1035: unp_gc()
                   1036: {
1.46      augustss 1037:        struct file *fp, *nextfp;
                   1038:        struct socket *so, *so1;
1.8       mycroft  1039:        struct file **extra_ref, **fpp;
                   1040:        int nunref, i;
1.1       cgd      1041:
                   1042:        if (unp_gcing)
                   1043:                return;
                   1044:        unp_gcing = 1;
                   1045:        unp_defer = 0;
1.39      sommerfe 1046:
                   1047:        /* Clear mark bits */
1.11      mycroft  1048:        for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
1.1       cgd      1049:                fp->f_flag &= ~(FMARK|FDEFER);
1.39      sommerfe 1050:
                   1051:        /*
                   1052:         * Iterate over the set of descriptors, marking ones believed
                   1053:         * (based on refcount) to be referenced from a process, and
                   1054:         * marking for rescan descriptors which are queued on a socket.
                   1055:         */
1.1       cgd      1056:        do {
1.11      mycroft  1057:                for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
1.1       cgd      1058:                        if (fp->f_flag & FDEFER) {
                   1059:                                fp->f_flag &= ~FDEFER;
                   1060:                                unp_defer--;
1.39      sommerfe 1061: #ifdef DIAGNOSTIC
                   1062:                                if (fp->f_count == 0)
                   1063:                                        panic("unp_gc: deferred unreferenced socket");
                   1064: #endif
1.1       cgd      1065:                        } else {
1.39      sommerfe 1066:                                if (fp->f_count == 0)
                   1067:                                        continue;
1.1       cgd      1068:                                if (fp->f_flag & FMARK)
                   1069:                                        continue;
                   1070:                                if (fp->f_count == fp->f_msgcount)
                   1071:                                        continue;
                   1072:                        }
1.39      sommerfe 1073:                        fp->f_flag |= FMARK;
                   1074:
1.1       cgd      1075:                        if (fp->f_type != DTYPE_SOCKET ||
                   1076:                            (so = (struct socket *)fp->f_data) == 0)
                   1077:                                continue;
                   1078:                        if (so->so_proto->pr_domain != &unixdomain ||
                   1079:                            (so->so_proto->pr_flags&PR_RIGHTS) == 0)
                   1080:                                continue;
                   1081: #ifdef notdef
                   1082:                        if (so->so_rcv.sb_flags & SB_LOCK) {
                   1083:                                /*
                   1084:                                 * This is problematical; it's not clear
                   1085:                                 * we need to wait for the sockbuf to be
                   1086:                                 * unlocked (on a uniprocessor, at least),
                   1087:                                 * and it's also not clear what to do
                   1088:                                 * if sbwait returns an error due to receipt
                   1089:                                 * of a signal.  If sbwait does return
                   1090:                                 * an error, we'll go into an infinite
                   1091:                                 * loop.  Delete all of this for now.
                   1092:                                 */
                   1093:                                (void) sbwait(&so->so_rcv);
                   1094:                                goto restart;
                   1095:                        }
                   1096: #endif
1.39      sommerfe 1097:                        unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
                   1098:                        /*
                   1099:                         * mark descriptors referenced from sockets queued on the accept queue as well.
                   1100:                         */
                   1101:                        if (so->so_options & SO_ACCEPTCONN) {
                   1102:                                for (so1 = so->so_q0.tqh_first;
                   1103:                                     so1 != 0;
                   1104:                                     so1 = so1->so_qe.tqe_next) {
                   1105:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1106:                                }
                   1107:                                for (so1 = so->so_q.tqh_first;
                   1108:                                     so1 != 0;
                   1109:                                     so1 = so1->so_qe.tqe_next) {
                   1110:                                        unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
                   1111:                                }
                   1112:                        }
                   1113:
1.1       cgd      1114:                }
                   1115:        } while (unp_defer);
1.8       mycroft  1116:        /*
1.39      sommerfe 1117:         * Sweep pass.  Find unmarked descriptors, and free them.
                   1118:         *
1.8       mycroft  1119:         * We grab an extra reference to each of the file table entries
                   1120:         * that are not otherwise accessible and then free the rights
                   1121:         * that are stored in messages on them.
                   1122:         *
                   1123:         * The bug in the orginal code is a little tricky, so I'll describe
                   1124:         * what's wrong with it here.
                   1125:         *
                   1126:         * It is incorrect to simply unp_discard each entry for f_msgcount
                   1127:         * times -- consider the case of sockets A and B that contain
                   1128:         * references to each other.  On a last close of some other socket,
                   1129:         * we trigger a gc since the number of outstanding rights (unp_rights)
                   1130:         * is non-zero.  If during the sweep phase the gc code un_discards,
                   1131:         * we end up doing a (full) closef on the descriptor.  A closef on A
                   1132:         * results in the following chain.  Closef calls soo_close, which
                   1133:         * calls soclose.   Soclose calls first (through the switch
                   1134:         * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
                   1135:         * returns because the previous instance had set unp_gcing, and
                   1136:         * we return all the way back to soclose, which marks the socket
                   1137:         * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
                   1138:         * to free up the rights that are queued in messages on the socket A,
                   1139:         * i.e., the reference on B.  The sorflush calls via the dom_dispose
                   1140:         * switch unp_dispose, which unp_scans with unp_discard.  This second
                   1141:         * instance of unp_discard just calls closef on B.
                   1142:         *
                   1143:         * Well, a similar chain occurs on B, resulting in a sorflush on B,
                   1144:         * which results in another closef on A.  Unfortunately, A is already
                   1145:         * being closed, and the descriptor has already been marked with
                   1146:         * SS_NOFDREF, and soclose panics at this point.
                   1147:         *
                   1148:         * Here, we first take an extra reference to each inaccessible
1.39      sommerfe 1149:         * descriptor.  Then, if the inaccessible descriptor is a
                   1150:         * socket, we call sorflush in case it is a Unix domain
                   1151:         * socket.  After we destroy all the rights carried in
                   1152:         * messages, we do a last closef to get rid of our extra
                   1153:         * reference.  This is the last close, and the unp_detach etc
                   1154:         * will shut down the socket.
1.8       mycroft  1155:         *
                   1156:         * 91/09/19, bsy@cs.cmu.edu
                   1157:         */
                   1158:        extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1.11      mycroft  1159:        for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
                   1160:            fp = nextfp) {
                   1161:                nextfp = fp->f_list.le_next;
1.1       cgd      1162:                if (fp->f_count == 0)
                   1163:                        continue;
1.8       mycroft  1164:                if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
                   1165:                        *fpp++ = fp;
                   1166:                        nunref++;
                   1167:                        fp->f_count++;
                   1168:                }
1.1       cgd      1169:        }
1.39      sommerfe 1170:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1171:                fp = *fpp;
1.44      thorpej  1172:                FILE_USE(fp);
1.39      sommerfe 1173:                if (fp->f_type == DTYPE_SOCKET)
                   1174:                        sorflush((struct socket *)fp->f_data);
1.44      thorpej  1175:                FILE_UNUSE(fp, NULL);
1.39      sommerfe 1176:        }
1.44      thorpej  1177:        for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1.45      thorpej  1178:                fp = *fpp;
1.44      thorpej  1179:                FILE_USE(fp);
1.45      thorpej  1180:                (void) closef(fp, (struct proc *)0);
1.44      thorpej  1181:        }
1.8       mycroft  1182:        free((caddr_t)extra_ref, M_FILE);
1.1       cgd      1183:        unp_gcing = 0;
                   1184: }
                   1185:
1.5       andrew   1186: void
1.1       cgd      1187: unp_dispose(m)
                   1188:        struct mbuf *m;
                   1189: {
1.8       mycroft  1190:
1.1       cgd      1191:        if (m)
1.39      sommerfe 1192:                unp_scan(m, unp_discard, 1);
1.1       cgd      1193: }
                   1194:
1.5       andrew   1195: void
1.39      sommerfe 1196: unp_scan(m0, op, discard)
1.46      augustss 1197:        struct mbuf *m0;
1.5       andrew   1198:        void (*op) __P((struct file *));
1.39      sommerfe 1199:        int discard;
1.1       cgd      1200: {
1.46      augustss 1201:        struct mbuf *m;
                   1202:        struct file **rp;
                   1203:        struct cmsghdr *cm;
                   1204:        int i;
1.1       cgd      1205:        int qfds;
                   1206:
                   1207:        while (m0) {
1.48    ! thorpej  1208:                for (m = m0; m; m = m->m_next) {
1.1       cgd      1209:                        if (m->m_type == MT_CONTROL &&
                   1210:                            m->m_len >= sizeof(*cm)) {
                   1211:                                cm = mtod(m, struct cmsghdr *);
                   1212:                                if (cm->cmsg_level != SOL_SOCKET ||
                   1213:                                    cm->cmsg_type != SCM_RIGHTS)
                   1214:                                        continue;
1.48    ! thorpej  1215:                                qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm)))
        !          1216:                                    / sizeof(struct file *);
        !          1217:                                rp = (struct file **)CMSG_DATA(cm);
1.39      sommerfe 1218:                                for (i = 0; i < qfds; i++) {
                   1219:                                        struct file *fp = *rp;
                   1220:                                        if (discard)
                   1221:                                                *rp = 0;
                   1222:                                        (*op)(fp);
                   1223:                                        rp++;
                   1224:                                }
1.1       cgd      1225:                                break;          /* XXX, but saves time */
                   1226:                        }
1.48    ! thorpej  1227:                }
1.1       cgd      1228:                m0 = m0->m_act;
                   1229:        }
                   1230: }
                   1231:
1.5       andrew   1232: void
1.1       cgd      1233: unp_mark(fp)
                   1234:        struct file *fp;
                   1235: {
1.39      sommerfe 1236:        if (fp == NULL)
                   1237:                return;
                   1238:
                   1239:        if (fp->f_flag & FMARK)
                   1240:                return;
1.1       cgd      1241:
1.39      sommerfe 1242:        /* If we're already deferred, don't screw up the defer count */
                   1243:        if (fp->f_flag & FDEFER)
1.1       cgd      1244:                return;
1.39      sommerfe 1245:
                   1246:        /*
                   1247:         * Minimize the number of deferrals...  Sockets are the only
                   1248:         * type of descriptor which can hold references to another
                   1249:         * descriptor, so just mark other descriptors, and defer
                   1250:         * unmarked sockets for the next pass.
                   1251:         */
                   1252:        if (fp->f_type == DTYPE_SOCKET) {
                   1253:                unp_defer++;
                   1254:                if (fp->f_count == 0)
                   1255:                        panic("unp_mark: queued unref");
                   1256:                fp->f_flag |= FDEFER;
                   1257:        } else {
                   1258:                fp->f_flag |= FMARK;
                   1259:        }
                   1260:        return;
1.1       cgd      1261: }
                   1262:
1.5       andrew   1263: void
1.1       cgd      1264: unp_discard(fp)
                   1265:        struct file *fp;
                   1266: {
1.39      sommerfe 1267:        if (fp == NULL)
                   1268:                return;
1.44      thorpej  1269:        FILE_USE(fp);
1.1       cgd      1270:        fp->f_msgcount--;
                   1271:        unp_rights--;
1.13      mycroft  1272:        (void) closef(fp, (struct proc *)0);
1.1       cgd      1273: }

CVSweb <webmaster@jp.NetBSD.org>