[BACK]Return to uipc_socket2.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/uipc_socket2.c, Revision 1.55

1.55    ! christos    1: /*     $NetBSD: uipc_socket2.c,v 1.54 2003/08/07 16:31:59 agc Exp $    */
1.9       cgd         2:
1.1       cgd         3: /*
1.7       mycroft     4:  * Copyright (c) 1982, 1986, 1988, 1990, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
1.1       cgd         6:  *
                      7:  * Redistribution and use in source and binary forms, with or without
                      8:  * modification, are permitted provided that the following conditions
                      9:  * are met:
                     10:  * 1. Redistributions of source code must retain the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer.
                     12:  * 2. Redistributions in binary form must reproduce the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer in the
                     14:  *    documentation and/or other materials provided with the distribution.
1.54      agc        15:  * 3. Neither the name of the University nor the names of its contributors
1.1       cgd        16:  *    may be used to endorse or promote products derived from this software
                     17:  *    without specific prior written permission.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     20:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     22:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     23:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     24:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     25:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     26:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     27:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     28:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     29:  * SUCH DAMAGE.
                     30:  *
1.23      fvdl       31:  *     @(#)uipc_socket2.c      8.2 (Berkeley) 2/14/95
1.1       cgd        32:  */
1.42      lukem      33:
                     34: #include <sys/cdefs.h>
1.55    ! christos   35: __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.54 2003/08/07 16:31:59 agc Exp $");
1.51      martin     36:
                     37: #include "opt_mbuftrace.h"
1.1       cgd        38:
1.5       mycroft    39: #include <sys/param.h>
                     40: #include <sys/systm.h>
                     41: #include <sys/proc.h>
                     42: #include <sys/file.h>
                     43: #include <sys/buf.h>
                     44: #include <sys/malloc.h>
                     45: #include <sys/mbuf.h>
                     46: #include <sys/protosw.h>
1.55    ! christos   47: #include <sys/poll.h>
1.5       mycroft    48: #include <sys/socket.h>
                     49: #include <sys/socketvar.h>
1.11      christos   50: #include <sys/signalvar.h>
1.1       cgd        51:
                     52: /*
                     53:  * Primitive routines for operating on sockets and socket buffers
                     54:  */
                     55:
                     56: /* strings for sleep message: */
1.21      mycroft    57: const char     netcon[] = "netcon";
                     58: const char     netcls[] = "netcls";
1.41      enami      59: const char     netio[] = "netio";
                     60: const char     netlck[] = "netlck";
1.1       cgd        61:
                     62: /*
                     63:  * Procedures to manipulate state flags of socket
                     64:  * and do appropriate wakeups.  Normal sequence from the
                     65:  * active (originating) side is that soisconnecting() is
                     66:  * called during processing of connect() call,
                     67:  * resulting in an eventual call to soisconnected() if/when the
                     68:  * connection is established.  When the connection is torn down
                     69:  * soisdisconnecting() is called during processing of disconnect() call,
                     70:  * and soisdisconnected() is called when the connection to the peer
                     71:  * is totally severed.  The semantics of these routines are such that
                     72:  * connectionless protocols can call soisconnected() and soisdisconnected()
                     73:  * only, bypassing the in-progress calls when setting up a ``connection''
                     74:  * takes no time.
                     75:  *
                     76:  * From the passive side, a socket is created with
                     77:  * two queues of sockets: so_q0 for connections in progress
                     78:  * and so_q for connections already made and awaiting user acceptance.
                     79:  * As a protocol is preparing incoming connections, it creates a socket
                     80:  * structure queued on so_q0 by calling sonewconn().  When the connection
                     81:  * is established, soisconnected() is called, and transfers the
                     82:  * socket structure to so_q, making it available to accept().
                     83:  *
                     84:  * If a socket is closed with sockets on either
                     85:  * so_q0 or so_q, these sockets are dropped.
                     86:  *
                     87:  * If higher level protocols are implemented in
                     88:  * the kernel, the wakeups done here will sometimes
                     89:  * cause software-interrupt process scheduling.
                     90:  */
                     91:
1.7       mycroft    92: void
1.37      lukem      93: soisconnecting(struct socket *so)
1.1       cgd        94: {
                     95:
                     96:        so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
                     97:        so->so_state |= SS_ISCONNECTING;
                     98: }
                     99:
1.7       mycroft   100: void
1.37      lukem     101: soisconnected(struct socket *so)
1.1       cgd       102: {
1.37      lukem     103:        struct socket   *head;
1.1       cgd       104:
1.37      lukem     105:        head = so->so_head;
1.1       cgd       106:        so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
                    107:        so->so_state |= SS_ISCONNECTED;
                    108:        if (head && soqremque(so, 0)) {
                    109:                soqinsque(head, so, 1);
                    110:                sorwakeup(head);
                    111:                wakeup((caddr_t)&head->so_timeo);
                    112:        } else {
                    113:                wakeup((caddr_t)&so->so_timeo);
                    114:                sorwakeup(so);
                    115:                sowwakeup(so);
                    116:        }
                    117: }
                    118:
1.7       mycroft   119: void
1.37      lukem     120: soisdisconnecting(struct socket *so)
1.1       cgd       121: {
                    122:
                    123:        so->so_state &= ~SS_ISCONNECTING;
                    124:        so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
                    125:        wakeup((caddr_t)&so->so_timeo);
                    126:        sowwakeup(so);
                    127:        sorwakeup(so);
                    128: }
                    129:
1.7       mycroft   130: void
1.37      lukem     131: soisdisconnected(struct socket *so)
1.1       cgd       132: {
                    133:
                    134:        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1.27      mycroft   135:        so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1.1       cgd       136:        wakeup((caddr_t)&so->so_timeo);
                    137:        sowwakeup(so);
                    138:        sorwakeup(so);
                    139: }
                    140:
                    141: /*
                    142:  * When an attempt at a new connection is noted on a socket
                    143:  * which accepts connections, sonewconn is called.  If the
                    144:  * connection is possible (subject to space constraints, etc.)
                    145:  * then we allocate a new structure, propoerly linked into the
                    146:  * data structure of the original socket, and return this.
                    147:  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
                    148:  *
                    149:  * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
                    150:  * to catch calls that are missing the (new) second parameter.
                    151:  */
                    152: struct socket *
1.37      lukem     153: sonewconn1(struct socket *head, int connstatus)
1.1       cgd       154: {
1.37      lukem     155:        struct socket   *so;
                    156:        int             soqueue;
1.1       cgd       157:
1.37      lukem     158:        soqueue = connstatus ? 1 : 0;
1.1       cgd       159:        if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
                    160:                return ((struct socket *)0);
1.25      thorpej   161:        so = pool_get(&socket_pool, PR_NOWAIT);
1.1       cgd       162:        if (so == NULL)
1.25      thorpej   163:                return (NULL);
1.26      perry     164:        memset((caddr_t)so, 0, sizeof(*so));
1.1       cgd       165:        so->so_type = head->so_type;
                    166:        so->so_options = head->so_options &~ SO_ACCEPTCONN;
                    167:        so->so_linger = head->so_linger;
                    168:        so->so_state = head->so_state | SS_NOFDREF;
                    169:        so->so_proto = head->so_proto;
                    170:        so->so_timeo = head->so_timeo;
                    171:        so->so_pgid = head->so_pgid;
1.24      matt      172:        so->so_send = head->so_send;
                    173:        so->so_receive = head->so_receive;
1.28      lukem     174:        so->so_uid = head->so_uid;
1.49      matt      175: #ifdef MBUFTRACE
                    176:        so->so_mowner = head->so_mowner;
                    177:        so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
                    178:        so->so_snd.sb_mowner = head->so_snd.sb_mowner;
                    179: #endif
1.1       cgd       180:        (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
                    181:        soqinsque(head, so, soqueue);
                    182:        if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
1.12      mycroft   183:            (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
1.53      fvdl      184:            (struct proc *)0)) {
1.1       cgd       185:                (void) soqremque(so, soqueue);
1.25      thorpej   186:                pool_put(&socket_pool, so);
                    187:                return (NULL);
1.1       cgd       188:        }
                    189:        if (connstatus) {
                    190:                sorwakeup(head);
                    191:                wakeup((caddr_t)&head->so_timeo);
                    192:                so->so_state |= connstatus;
                    193:        }
                    194:        return (so);
                    195: }
                    196:
1.7       mycroft   197: void
1.37      lukem     198: soqinsque(struct socket *head, struct socket *so, int q)
1.1       cgd       199: {
                    200:
1.22      thorpej   201: #ifdef DIAGNOSTIC
                    202:        if (so->so_onq != NULL)
                    203:                panic("soqinsque");
                    204: #endif
                    205:
1.1       cgd       206:        so->so_head = head;
                    207:        if (q == 0) {
                    208:                head->so_q0len++;
1.22      thorpej   209:                so->so_onq = &head->so_q0;
1.1       cgd       210:        } else {
                    211:                head->so_qlen++;
1.22      thorpej   212:                so->so_onq = &head->so_q;
1.1       cgd       213:        }
1.22      thorpej   214:        TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
1.1       cgd       215: }
                    216:
1.7       mycroft   217: int
1.37      lukem     218: soqremque(struct socket *so, int q)
1.1       cgd       219: {
1.37      lukem     220:        struct socket   *head;
1.1       cgd       221:
1.37      lukem     222:        head = so->so_head;
1.22      thorpej   223:        if (q == 0) {
                    224:                if (so->so_onq != &head->so_q0)
1.17      thorpej   225:                        return (0);
1.1       cgd       226:                head->so_q0len--;
                    227:        } else {
1.22      thorpej   228:                if (so->so_onq != &head->so_q)
                    229:                        return (0);
1.1       cgd       230:                head->so_qlen--;
                    231:        }
1.22      thorpej   232:        TAILQ_REMOVE(so->so_onq, so, so_qe);
                    233:        so->so_onq = NULL;
                    234:        so->so_head = NULL;
1.1       cgd       235:        return (1);
                    236: }
                    237:
                    238: /*
                    239:  * Socantsendmore indicates that no more data will be sent on the
                    240:  * socket; it would normally be applied to a socket when the user
                    241:  * informs the system that no more data is to be sent, by the protocol
                    242:  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
                    243:  * will be received, and will normally be applied to the socket by a
                    244:  * protocol when it detects that the peer will send no more data.
                    245:  * Data queued for reading in the socket may yet be read.
                    246:  */
                    247:
1.4       andrew    248: void
1.37      lukem     249: socantsendmore(struct socket *so)
1.1       cgd       250: {
                    251:
                    252:        so->so_state |= SS_CANTSENDMORE;
                    253:        sowwakeup(so);
                    254: }
                    255:
1.4       andrew    256: void
1.37      lukem     257: socantrcvmore(struct socket *so)
1.1       cgd       258: {
                    259:
                    260:        so->so_state |= SS_CANTRCVMORE;
                    261:        sorwakeup(so);
                    262: }
                    263:
                    264: /*
                    265:  * Wait for data to arrive at/drain from a socket buffer.
                    266:  */
1.7       mycroft   267: int
1.37      lukem     268: sbwait(struct sockbuf *sb)
1.1       cgd       269: {
                    270:
                    271:        sb->sb_flags |= SB_WAIT;
                    272:        return (tsleep((caddr_t)&sb->sb_cc,
                    273:            (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
                    274:            sb->sb_timeo));
                    275: }
                    276:
                    277: /*
                    278:  * Lock a sockbuf already known to be locked;
                    279:  * return any error returned from sleep (EINTR).
                    280:  */
1.7       mycroft   281: int
1.37      lukem     282: sb_lock(struct sockbuf *sb)
1.1       cgd       283: {
1.37      lukem     284:        int     error;
1.1       cgd       285:
                    286:        while (sb->sb_flags & SB_LOCK) {
                    287:                sb->sb_flags |= SB_WANT;
1.11      christos  288:                error = tsleep((caddr_t)&sb->sb_flags,
1.41      enami     289:                    (sb->sb_flags & SB_NOINTR) ?  PSOCK : PSOCK|PCATCH,
                    290:                    netlck, 0);
1.11      christos  291:                if (error)
1.1       cgd       292:                        return (error);
                    293:        }
                    294:        sb->sb_flags |= SB_LOCK;
                    295:        return (0);
                    296: }
                    297:
                    298: /*
                    299:  * Wakeup processes waiting on a socket buffer.
                    300:  * Do asynchronous notification via SIGIO
1.39      manu      301:  * if the socket buffer has the SB_ASYNC flag set.
1.1       cgd       302:  */
1.7       mycroft   303: void
1.55    ! christos  304: sowakeup(struct socket *so, struct sockbuf *sb, int code)
1.1       cgd       305: {
1.37      lukem     306:        struct proc     *p;
1.1       cgd       307:
1.48      jdolecek  308:        selnotify(&sb->sb_sel, 0);
1.7       mycroft   309:        sb->sb_flags &= ~SB_SEL;
1.1       cgd       310:        if (sb->sb_flags & SB_WAIT) {
                    311:                sb->sb_flags &= ~SB_WAIT;
                    312:                wakeup((caddr_t)&sb->sb_cc);
                    313:        }
1.39      manu      314:        if (sb->sb_flags & SB_ASYNC) {
1.55    ! christos  315:                ksiginfo_t ksi;
        !           316:                memset(&ksi, 0, sizeof(ksi));
        !           317:                ksi.ksi_signo = SIGIO;
        !           318:                ksi.ksi_code = code;
        !           319:                if (code == POLL_IN) {
        !           320:                        if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
        !           321:                                ksi.ksi_band = (POLLPRI | POLLRDBAND);
        !           322:                        else
        !           323:                                ksi.ksi_band = (POLLIN | POLLRDNORM);
        !           324:                } else {
        !           325:                        if (so->so_oobmark)
        !           326:                                ksi.ksi_band = (POLLPRI | POLLWRBAND);
        !           327:                        else
        !           328:                                ksi.ksi_band = (POLLOUT | POLLWRNORM);
        !           329:                }
1.1       cgd       330:                if (so->so_pgid < 0)
1.55    ! christos  331:                        kgsignal(-so->so_pgid, &ksi, so);
1.1       cgd       332:                else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
1.55    ! christos  333:                        kpsignal(p, &ksi, so);
1.1       cgd       334:        }
1.24      matt      335:        if (sb->sb_flags & SB_UPCALL)
                    336:                (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
1.1       cgd       337: }
                    338:
                    339: /*
                    340:  * Socket buffer (struct sockbuf) utility routines.
                    341:  *
                    342:  * Each socket contains two socket buffers: one for sending data and
                    343:  * one for receiving data.  Each buffer contains a queue of mbufs,
                    344:  * information about the number of mbufs and amount of data in the
1.13      mycroft   345:  * queue, and other fields allowing poll() statements and notification
1.1       cgd       346:  * on data availability to be implemented.
                    347:  *
                    348:  * Data stored in a socket buffer is maintained as a list of records.
                    349:  * Each record is a list of mbufs chained together with the m_next
                    350:  * field.  Records are chained together with the m_nextpkt field. The upper
                    351:  * level routine soreceive() expects the following conventions to be
                    352:  * observed when placing information in the receive buffer:
                    353:  *
                    354:  * 1. If the protocol requires each message be preceded by the sender's
                    355:  *    name, then a record containing that name must be present before
                    356:  *    any associated data (mbuf's must be of type MT_SONAME).
                    357:  * 2. If the protocol supports the exchange of ``access rights'' (really
                    358:  *    just additional data associated with the message), and there are
                    359:  *    ``rights'' to be received, then a record containing this data
1.10      mycroft   360:  *    should be present (mbuf's must be of type MT_CONTROL).
1.1       cgd       361:  * 3. If a name or rights record exists, then it must be followed by
                    362:  *    a data record, perhaps of zero length.
                    363:  *
                    364:  * Before using a new socket structure it is first necessary to reserve
                    365:  * buffer space to the socket, by calling sbreserve().  This should commit
                    366:  * some of the available buffer space in the system buffer pool for the
                    367:  * socket (currently, it does nothing but enforce limits).  The space
                    368:  * should be released by calling sbrelease() when the socket is destroyed.
                    369:  */
                    370:
1.7       mycroft   371: int
1.37      lukem     372: soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1.1       cgd       373: {
                    374:
                    375:        if (sbreserve(&so->so_snd, sndcc) == 0)
                    376:                goto bad;
                    377:        if (sbreserve(&so->so_rcv, rcvcc) == 0)
                    378:                goto bad2;
                    379:        if (so->so_rcv.sb_lowat == 0)
                    380:                so->so_rcv.sb_lowat = 1;
                    381:        if (so->so_snd.sb_lowat == 0)
                    382:                so->so_snd.sb_lowat = MCLBYTES;
                    383:        if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
                    384:                so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
                    385:        return (0);
1.37      lukem     386:  bad2:
1.1       cgd       387:        sbrelease(&so->so_snd);
1.37      lukem     388:  bad:
1.1       cgd       389:        return (ENOBUFS);
                    390: }
                    391:
                    392: /*
                    393:  * Allot mbufs to a sockbuf.
                    394:  * Attempt to scale mbmax so that mbcnt doesn't become limiting
                    395:  * if buffering efficiency is near the normal case.
                    396:  */
1.7       mycroft   397: int
1.37      lukem     398: sbreserve(struct sockbuf *sb, u_long cc)
1.1       cgd       399: {
                    400:
1.38      kml       401:        if (cc == 0 ||
                    402:            (u_quad_t) cc > (u_quad_t) sb_max * MCLBYTES / (MSIZE + MCLBYTES))
1.1       cgd       403:                return (0);
                    404:        sb->sb_hiwat = cc;
                    405:        sb->sb_mbmax = min(cc * 2, sb_max);
                    406:        if (sb->sb_lowat > sb->sb_hiwat)
                    407:                sb->sb_lowat = sb->sb_hiwat;
                    408:        return (1);
                    409: }
                    410:
                    411: /*
                    412:  * Free mbufs held by a socket, and reserved mbuf space.
                    413:  */
1.7       mycroft   414: void
1.37      lukem     415: sbrelease(struct sockbuf *sb)
1.1       cgd       416: {
                    417:
                    418:        sbflush(sb);
                    419:        sb->sb_hiwat = sb->sb_mbmax = 0;
                    420: }
                    421:
                    422: /*
                    423:  * Routines to add and remove
                    424:  * data from an mbuf queue.
                    425:  *
                    426:  * The routines sbappend() or sbappendrecord() are normally called to
                    427:  * append new mbufs to a socket buffer, after checking that adequate
                    428:  * space is available, comparing the function sbspace() with the amount
                    429:  * of data to be added.  sbappendrecord() differs from sbappend() in
                    430:  * that data supplied is treated as the beginning of a new record.
                    431:  * To place a sender's address, optional access rights, and data in a
                    432:  * socket receive buffer, sbappendaddr() should be used.  To place
                    433:  * access rights and data in a socket receive buffer, sbappendrights()
                    434:  * should be used.  In either case, the new data begins a new record.
                    435:  * Note that unlike sbappend() and sbappendrecord(), these routines check
                    436:  * for the caller that there will be enough space to store the data.
                    437:  * Each fails if there is not enough space, or if it cannot find mbufs
                    438:  * to store additional information in.
                    439:  *
                    440:  * Reliable protocols may use the socket send buffer to hold data
                    441:  * awaiting acknowledgement.  Data is normally copied from a socket
                    442:  * send buffer in a protocol with m_copy for output to a peer,
                    443:  * and then removing the data from the socket buffer with sbdrop()
                    444:  * or sbdroprecord() when the data is acknowledged by the peer.
                    445:  */
                    446:
1.43      thorpej   447: #ifdef SOCKBUF_DEBUG
                    448: void
                    449: sblastrecordchk(struct sockbuf *sb, const char *where)
                    450: {
                    451:        struct mbuf *m = sb->sb_mb;
                    452:
                    453:        while (m && m->m_nextpkt)
                    454:                m = m->m_nextpkt;
                    455:
                    456:        if (m != sb->sb_lastrecord) {
                    457:                printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
                    458:                    sb->sb_mb, sb->sb_lastrecord, m);
                    459:                printf("packet chain:\n");
                    460:                for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
                    461:                        printf("\t%p\n", m);
1.47      provos    462:                panic("sblastrecordchk from %s", where);
1.43      thorpej   463:        }
                    464: }
                    465:
                    466: void
                    467: sblastmbufchk(struct sockbuf *sb, const char *where)
                    468: {
                    469:        struct mbuf *m = sb->sb_mb;
                    470:        struct mbuf *n;
                    471:
                    472:        while (m && m->m_nextpkt)
                    473:                m = m->m_nextpkt;
                    474:
                    475:        while (m && m->m_next)
                    476:                m = m->m_next;
                    477:
                    478:        if (m != sb->sb_mbtail) {
                    479:                printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
                    480:                    sb->sb_mb, sb->sb_mbtail, m);
                    481:                printf("packet tree:\n");
                    482:                for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
                    483:                        printf("\t");
                    484:                        for (n = m; n != NULL; n = n->m_next)
                    485:                                printf("%p ", n);
                    486:                        printf("\n");
                    487:                }
                    488:                panic("sblastmbufchk from %s", where);
                    489:        }
                    490: }
                    491: #endif /* SOCKBUF_DEBUG */
                    492:
                    493: #define        SBLINKRECORD(sb, m0)                                            \
                    494: do {                                                                   \
                    495:        if ((sb)->sb_lastrecord != NULL)                                \
                    496:                (sb)->sb_lastrecord->m_nextpkt = (m0);                  \
                    497:        else                                                            \
                    498:                (sb)->sb_mb = (m0);                                     \
                    499:        (sb)->sb_lastrecord = (m0);                                     \
                    500: } while (/*CONSTCOND*/0)
                    501:
1.1       cgd       502: /*
                    503:  * Append mbuf chain m to the last record in the
                    504:  * socket buffer sb.  The additional space associated
                    505:  * the mbuf chain is recorded in sb.  Empty mbufs are
                    506:  * discarded and mbufs are compacted where possible.
                    507:  */
1.7       mycroft   508: void
1.37      lukem     509: sbappend(struct sockbuf *sb, struct mbuf *m)
1.1       cgd       510: {
1.37      lukem     511:        struct mbuf     *n;
1.1       cgd       512:
                    513:        if (m == 0)
                    514:                return;
1.43      thorpej   515:
1.49      matt      516: #ifdef MBUFTRACE
                    517:        m_claim(m, sb->sb_mowner);
                    518: #endif
                    519:
1.43      thorpej   520:        SBLASTRECORDCHK(sb, "sbappend 1");
                    521:
                    522:        if ((n = sb->sb_lastrecord) != NULL) {
                    523:                /*
                    524:                 * XXX Would like to simply use sb_mbtail here, but
                    525:                 * XXX I need to verify that I won't miss an EOR that
                    526:                 * XXX way.
                    527:                 */
1.1       cgd       528:                do {
                    529:                        if (n->m_flags & M_EOR) {
                    530:                                sbappendrecord(sb, m); /* XXXXXX!!!! */
                    531:                                return;
                    532:                        }
                    533:                } while (n->m_next && (n = n->m_next));
1.43      thorpej   534:        } else {
                    535:                /*
                    536:                 * If this is the first record in the socket buffer, it's
                    537:                 * also the last record.
                    538:                 */
                    539:                sb->sb_lastrecord = m;
1.1       cgd       540:        }
                    541:        sbcompress(sb, m, n);
1.43      thorpej   542:        SBLASTRECORDCHK(sb, "sbappend 2");
                    543: }
                    544:
                    545: /*
                    546:  * This version of sbappend() should only be used when the caller
                    547:  * absolutely knows that there will never be more than one record
                    548:  * in the socket buffer, that is, a stream protocol (such as TCP).
                    549:  */
                    550: void
1.44      thorpej   551: sbappendstream(struct sockbuf *sb, struct mbuf *m)
1.43      thorpej   552: {
                    553:
                    554:        KDASSERT(m->m_nextpkt == NULL);
                    555:        KASSERT(sb->sb_mb == sb->sb_lastrecord);
                    556:
                    557:        SBLASTMBUFCHK(sb, __func__);
                    558:
1.49      matt      559: #ifdef MBUFTRACE
                    560:        m_claim(m, sb->sb_mowner);
                    561: #endif
                    562:
1.43      thorpej   563:        sbcompress(sb, m, sb->sb_mbtail);
                    564:
                    565:        sb->sb_lastrecord = sb->sb_mb;
                    566:        SBLASTRECORDCHK(sb, __func__);
1.1       cgd       567: }
                    568:
                    569: #ifdef SOCKBUF_DEBUG
1.7       mycroft   570: void
1.37      lukem     571: sbcheck(struct sockbuf *sb)
1.1       cgd       572: {
1.37      lukem     573:        struct mbuf     *m;
1.43      thorpej   574:        u_long          len, mbcnt;
1.1       cgd       575:
1.37      lukem     576:        len = 0;
                    577:        mbcnt = 0;
1.1       cgd       578:        for (m = sb->sb_mb; m; m = m->m_next) {
                    579:                len += m->m_len;
                    580:                mbcnt += MSIZE;
                    581:                if (m->m_flags & M_EXT)
                    582:                        mbcnt += m->m_ext.ext_size;
                    583:                if (m->m_nextpkt)
                    584:                        panic("sbcheck nextpkt");
                    585:        }
                    586:        if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1.43      thorpej   587:                printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
1.1       cgd       588:                    mbcnt, sb->sb_mbcnt);
                    589:                panic("sbcheck");
                    590:        }
                    591: }
                    592: #endif
                    593:
                    594: /*
                    595:  * As above, except the mbuf chain
                    596:  * begins a new record.
                    597:  */
1.7       mycroft   598: void
1.37      lukem     599: sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1.1       cgd       600: {
1.37      lukem     601:        struct mbuf     *m;
1.1       cgd       602:
                    603:        if (m0 == 0)
                    604:                return;
1.43      thorpej   605:
1.49      matt      606: #ifdef MBUFTRACE
                    607:        m_claim(m0, sb->sb_mowner);
                    608: #endif
1.1       cgd       609:        /*
                    610:         * Put the first mbuf on the queue.
                    611:         * Note this permits zero length records.
                    612:         */
                    613:        sballoc(sb, m0);
1.43      thorpej   614:        SBLASTRECORDCHK(sb, "sbappendrecord 1");
                    615:        SBLINKRECORD(sb, m0);
1.1       cgd       616:        m = m0->m_next;
                    617:        m0->m_next = 0;
                    618:        if (m && (m0->m_flags & M_EOR)) {
                    619:                m0->m_flags &= ~M_EOR;
                    620:                m->m_flags |= M_EOR;
                    621:        }
                    622:        sbcompress(sb, m, m0);
1.43      thorpej   623:        SBLASTRECORDCHK(sb, "sbappendrecord 2");
1.1       cgd       624: }
                    625:
                    626: /*
                    627:  * As above except that OOB data
                    628:  * is inserted at the beginning of the sockbuf,
                    629:  * but after any other OOB data.
                    630:  */
1.7       mycroft   631: void
1.37      lukem     632: sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1.1       cgd       633: {
1.37      lukem     634:        struct mbuf     *m, **mp;
1.1       cgd       635:
                    636:        if (m0 == 0)
                    637:                return;
1.43      thorpej   638:
                    639:        SBLASTRECORDCHK(sb, "sbinsertoob 1");
                    640:
1.11      christos  641:        for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
1.1       cgd       642:            again:
                    643:                switch (m->m_type) {
                    644:
                    645:                case MT_OOBDATA:
                    646:                        continue;               /* WANT next train */
                    647:
                    648:                case MT_CONTROL:
1.11      christos  649:                        if ((m = m->m_next) != NULL)
1.1       cgd       650:                                goto again;     /* inspect THIS train further */
                    651:                }
                    652:                break;
                    653:        }
                    654:        /*
                    655:         * Put the first mbuf on the queue.
                    656:         * Note this permits zero length records.
                    657:         */
                    658:        sballoc(sb, m0);
                    659:        m0->m_nextpkt = *mp;
1.43      thorpej   660:        if (*mp == NULL) {
                    661:                /* m0 is actually the new tail */
                    662:                sb->sb_lastrecord = m0;
                    663:        }
1.1       cgd       664:        *mp = m0;
                    665:        m = m0->m_next;
                    666:        m0->m_next = 0;
                    667:        if (m && (m0->m_flags & M_EOR)) {
                    668:                m0->m_flags &= ~M_EOR;
                    669:                m->m_flags |= M_EOR;
                    670:        }
                    671:        sbcompress(sb, m, m0);
1.43      thorpej   672:        SBLASTRECORDCHK(sb, "sbinsertoob 2");
1.1       cgd       673: }
                    674:
                    675: /*
                    676:  * Append address and data, and optionally, control (ancillary) data
                    677:  * to the receive queue of a socket.  If present,
                    678:  * m0 must include a packet header with total length.
                    679:  * Returns 0 if no space in sockbuf or insufficient mbufs.
                    680:  */
1.7       mycroft   681: int
1.37      lukem     682: sbappendaddr(struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0,
                    683:        struct mbuf *control)
1.1       cgd       684: {
1.43      thorpej   685:        struct mbuf     *m, *n, *nlast;
1.50      fvdl      686:        int             space, len;
1.1       cgd       687:
1.37      lukem     688:        space = asa->sa_len;
                    689:
1.49      matt      690:        if (m0 != NULL) {
                    691:                if ((m0->m_flags & M_PKTHDR) == 0)
                    692:                        panic("sbappendaddr");
1.1       cgd       693:                space += m0->m_pkthdr.len;
1.49      matt      694: #ifdef MBUFTRACE
                    695:                m_claim(m0, sb->sb_mowner);
                    696: #endif
                    697:        }
1.1       cgd       698:        for (n = control; n; n = n->m_next) {
                    699:                space += n->m_len;
1.49      matt      700:                MCLAIM(n, sb->sb_mowner);
1.1       cgd       701:                if (n->m_next == 0)     /* keep pointer to last control buf */
                    702:                        break;
                    703:        }
                    704:        if (space > sbspace(sb))
                    705:                return (0);
                    706:        MGET(m, M_DONTWAIT, MT_SONAME);
                    707:        if (m == 0)
                    708:                return (0);
1.49      matt      709:        MCLAIM(m, sb->sb_mowner);
1.50      fvdl      710:        /*
                    711:         * XXX avoid 'comparison always true' warning which isn't easily
                    712:         * avoided.
                    713:         */
                    714:        len = asa->sa_len;
                    715:        if (len > MLEN) {
1.20      thorpej   716:                MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
                    717:                if ((m->m_flags & M_EXT) == 0) {
                    718:                        m_free(m);
                    719:                        return (0);
                    720:                }
                    721:        }
1.1       cgd       722:        m->m_len = asa->sa_len;
1.26      perry     723:        memcpy(mtod(m, caddr_t), (caddr_t)asa, asa->sa_len);
1.1       cgd       724:        if (n)
                    725:                n->m_next = m0;         /* concatenate data to control */
                    726:        else
                    727:                control = m0;
                    728:        m->m_next = control;
1.43      thorpej   729:
                    730:        SBLASTRECORDCHK(sb, "sbappendaddr 1");
                    731:
                    732:        for (n = m; n->m_next != NULL; n = n->m_next)
1.1       cgd       733:                sballoc(sb, n);
1.43      thorpej   734:        sballoc(sb, n);
                    735:        nlast = n;
                    736:        SBLINKRECORD(sb, m);
                    737:
                    738:        sb->sb_mbtail = nlast;
                    739:        SBLASTMBUFCHK(sb, "sbappendaddr");
                    740:
                    741:        SBLASTRECORDCHK(sb, "sbappendaddr 2");
                    742:
1.1       cgd       743:        return (1);
                    744: }
                    745:
1.7       mycroft   746: int
1.37      lukem     747: sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
1.1       cgd       748: {
1.43      thorpej   749:        struct mbuf     *m, *mlast, *n;
1.37      lukem     750:        int             space;
1.1       cgd       751:
1.37      lukem     752:        space = 0;
1.1       cgd       753:        if (control == 0)
                    754:                panic("sbappendcontrol");
                    755:        for (m = control; ; m = m->m_next) {
                    756:                space += m->m_len;
1.49      matt      757:                MCLAIM(m, sb->sb_mowner);
1.1       cgd       758:                if (m->m_next == 0)
                    759:                        break;
                    760:        }
                    761:        n = m;                  /* save pointer to last control buffer */
1.49      matt      762:        for (m = m0; m; m = m->m_next) {
                    763:                MCLAIM(m, sb->sb_mowner);
1.1       cgd       764:                space += m->m_len;
1.49      matt      765:        }
1.1       cgd       766:        if (space > sbspace(sb))
                    767:                return (0);
                    768:        n->m_next = m0;                 /* concatenate data to control */
1.43      thorpej   769:
                    770:        SBLASTRECORDCHK(sb, "sbappendcontrol 1");
                    771:
                    772:        for (m = control; m->m_next != NULL; m = m->m_next)
1.1       cgd       773:                sballoc(sb, m);
1.43      thorpej   774:        sballoc(sb, m);
                    775:        mlast = m;
                    776:        SBLINKRECORD(sb, control);
                    777:
                    778:        sb->sb_mbtail = mlast;
                    779:        SBLASTMBUFCHK(sb, "sbappendcontrol");
                    780:
                    781:        SBLASTRECORDCHK(sb, "sbappendcontrol 2");
                    782:
1.1       cgd       783:        return (1);
                    784: }
                    785:
                    786: /*
                    787:  * Compress mbuf chain m into the socket
                    788:  * buffer sb following mbuf n.  If n
                    789:  * is null, the buffer is presumed empty.
                    790:  */
1.7       mycroft   791: void
1.37      lukem     792: sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1.1       cgd       793: {
1.37      lukem     794:        int             eor;
                    795:        struct mbuf     *o;
1.1       cgd       796:
1.37      lukem     797:        eor = 0;
1.1       cgd       798:        while (m) {
                    799:                eor |= m->m_flags & M_EOR;
                    800:                if (m->m_len == 0 &&
                    801:                    (eor == 0 ||
                    802:                     (((o = m->m_next) || (o = n)) &&
                    803:                      o->m_type == m->m_type))) {
1.46      thorpej   804:                        if (sb->sb_lastrecord == m)
                    805:                                sb->sb_lastrecord = m->m_next;
1.1       cgd       806:                        m = m_free(m);
                    807:                        continue;
                    808:                }
1.40      thorpej   809:                if (n && (n->m_flags & M_EOR) == 0 &&
                    810:                    /* M_TRAILINGSPACE() checks buffer writeability */
                    811:                    m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
                    812:                    m->m_len <= M_TRAILINGSPACE(n) &&
                    813:                    n->m_type == m->m_type) {
1.26      perry     814:                        memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
1.1       cgd       815:                            (unsigned)m->m_len);
                    816:                        n->m_len += m->m_len;
                    817:                        sb->sb_cc += m->m_len;
                    818:                        m = m_free(m);
                    819:                        continue;
                    820:                }
                    821:                if (n)
                    822:                        n->m_next = m;
                    823:                else
                    824:                        sb->sb_mb = m;
1.43      thorpej   825:                sb->sb_mbtail = m;
1.1       cgd       826:                sballoc(sb, m);
                    827:                n = m;
                    828:                m->m_flags &= ~M_EOR;
                    829:                m = m->m_next;
                    830:                n->m_next = 0;
                    831:        }
                    832:        if (eor) {
                    833:                if (n)
                    834:                        n->m_flags |= eor;
                    835:                else
1.15      christos  836:                        printf("semi-panic: sbcompress\n");
1.1       cgd       837:        }
1.43      thorpej   838:        SBLASTMBUFCHK(sb, __func__);
1.1       cgd       839: }
                    840:
                    841: /*
                    842:  * Free all mbufs in a sockbuf.
                    843:  * Check that all resources are reclaimed.
                    844:  */
1.7       mycroft   845: void
1.37      lukem     846: sbflush(struct sockbuf *sb)
1.1       cgd       847: {
                    848:
1.43      thorpej   849:        KASSERT((sb->sb_flags & SB_LOCK) == 0);
                    850:
1.1       cgd       851:        while (sb->sb_mbcnt)
                    852:                sbdrop(sb, (int)sb->sb_cc);
1.43      thorpej   853:
                    854:        KASSERT(sb->sb_cc == 0);
                    855:        KASSERT(sb->sb_mb == NULL);
                    856:        KASSERT(sb->sb_mbtail == NULL);
                    857:        KASSERT(sb->sb_lastrecord == NULL);
1.1       cgd       858: }
                    859:
                    860: /*
                    861:  * Drop data from (the front of) a sockbuf.
                    862:  */
1.7       mycroft   863: void
1.37      lukem     864: sbdrop(struct sockbuf *sb, int len)
1.1       cgd       865: {
1.37      lukem     866:        struct mbuf     *m, *mn, *next;
1.1       cgd       867:
                    868:        next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
                    869:        while (len > 0) {
                    870:                if (m == 0) {
                    871:                        if (next == 0)
                    872:                                panic("sbdrop");
                    873:                        m = next;
                    874:                        next = m->m_nextpkt;
                    875:                        continue;
                    876:                }
                    877:                if (m->m_len > len) {
                    878:                        m->m_len -= len;
                    879:                        m->m_data += len;
                    880:                        sb->sb_cc -= len;
                    881:                        break;
                    882:                }
                    883:                len -= m->m_len;
                    884:                sbfree(sb, m);
                    885:                MFREE(m, mn);
                    886:                m = mn;
                    887:        }
                    888:        while (m && m->m_len == 0) {
                    889:                sbfree(sb, m);
                    890:                MFREE(m, mn);
                    891:                m = mn;
                    892:        }
                    893:        if (m) {
                    894:                sb->sb_mb = m;
                    895:                m->m_nextpkt = next;
                    896:        } else
                    897:                sb->sb_mb = next;
1.43      thorpej   898:        /*
1.45      thorpej   899:         * First part is an inline SB_EMPTY_FIXUP().  Second part
1.43      thorpej   900:         * makes sure sb_lastrecord is up-to-date if we dropped
                    901:         * part of the last record.
                    902:         */
                    903:        m = sb->sb_mb;
                    904:        if (m == NULL) {
                    905:                sb->sb_mbtail = NULL;
                    906:                sb->sb_lastrecord = NULL;
                    907:        } else if (m->m_nextpkt == NULL)
                    908:                sb->sb_lastrecord = m;
1.1       cgd       909: }
                    910:
                    911: /*
                    912:  * Drop a record off the front of a sockbuf
                    913:  * and move the next record to the front.
                    914:  */
1.7       mycroft   915: void
1.37      lukem     916: sbdroprecord(struct sockbuf *sb)
1.1       cgd       917: {
1.37      lukem     918:        struct mbuf     *m, *mn;
1.1       cgd       919:
                    920:        m = sb->sb_mb;
                    921:        if (m) {
                    922:                sb->sb_mb = m->m_nextpkt;
                    923:                do {
                    924:                        sbfree(sb, m);
                    925:                        MFREE(m, mn);
1.11      christos  926:                } while ((m = mn) != NULL);
1.1       cgd       927:        }
1.45      thorpej   928:        SB_EMPTY_FIXUP(sb);
1.19      thorpej   929: }
                    930:
                    931: /*
                    932:  * Create a "control" mbuf containing the specified data
                    933:  * with the specified type for presentation on a socket buffer.
                    934:  */
                    935: struct mbuf *
1.37      lukem     936: sbcreatecontrol(caddr_t p, int size, int type, int level)
1.19      thorpej   937: {
1.37      lukem     938:        struct cmsghdr  *cp;
                    939:        struct mbuf     *m;
1.19      thorpej   940:
1.35      itojun    941:        if (CMSG_SPACE(size) > MCLBYTES) {
1.30      itojun    942:                printf("sbcreatecontrol: message too large %d\n", size);
                    943:                return NULL;
                    944:        }
                    945:
1.19      thorpej   946:        if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
                    947:                return ((struct mbuf *) NULL);
1.35      itojun    948:        if (CMSG_SPACE(size) > MLEN) {
1.30      itojun    949:                MCLGET(m, M_DONTWAIT);
                    950:                if ((m->m_flags & M_EXT) == 0) {
                    951:                        m_free(m);
                    952:                        return NULL;
                    953:                }
                    954:        }
1.19      thorpej   955:        cp = mtod(m, struct cmsghdr *);
1.26      perry     956:        memcpy(CMSG_DATA(cp), p, size);
1.35      itojun    957:        m->m_len = CMSG_SPACE(size);
                    958:        cp->cmsg_len = CMSG_LEN(size);
1.19      thorpej   959:        cp->cmsg_level = level;
                    960:        cp->cmsg_type = type;
                    961:        return (m);
1.1       cgd       962: }

CVSweb <webmaster@jp.NetBSD.org>