Annotation of src/sys/kern/uipc_socket2.c, Revision 1.69
1.69 ! christos 1: /* $NetBSD: uipc_socket2.c,v 1.53.2.6 2005/11/10 14:09:45 skrll Exp $ */
1.9 cgd 2:
1.1 cgd 3: /*
1.7 mycroft 4: * Copyright (c) 1982, 1986, 1988, 1990, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 cgd 6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
1.54 agc 15: * 3. Neither the name of the University nor the names of its contributors
1.1 cgd 16: * may be used to endorse or promote products derived from this software
17: * without specific prior written permission.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29: * SUCH DAMAGE.
30: *
1.23 fvdl 31: * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95
1.1 cgd 32: */
1.42 lukem 33:
34: #include <sys/cdefs.h>
1.69 ! christos 35: __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.53.2.6 2005/11/10 14:09:45 skrll Exp $");
1.51 martin 36:
37: #include "opt_mbuftrace.h"
1.58 thorpej 38: #include "opt_sb_max.h"
1.1 cgd 39:
1.5 mycroft 40: #include <sys/param.h>
41: #include <sys/systm.h>
42: #include <sys/proc.h>
43: #include <sys/file.h>
44: #include <sys/buf.h>
45: #include <sys/malloc.h>
46: #include <sys/mbuf.h>
47: #include <sys/protosw.h>
1.55 christos 48: #include <sys/poll.h>
1.5 mycroft 49: #include <sys/socket.h>
50: #include <sys/socketvar.h>
1.11 christos 51: #include <sys/signalvar.h>
1.1 cgd 52:
53: /*
54: * Primitive routines for operating on sockets and socket buffers
55: */
56:
57: /* strings for sleep message: */
1.21 mycroft 58: const char netcon[] = "netcon";
59: const char netcls[] = "netcls";
1.41 enami 60: const char netio[] = "netio";
61: const char netlck[] = "netlck";
1.1 cgd 62:
1.58 thorpej 63: u_long sb_max = SB_MAX; /* maximum socket buffer size */
64: static u_long sb_max_adj; /* adjusted sb_max */
65:
1.1 cgd 66: /*
67: * Procedures to manipulate state flags of socket
68: * and do appropriate wakeups. Normal sequence from the
69: * active (originating) side is that soisconnecting() is
70: * called during processing of connect() call,
71: * resulting in an eventual call to soisconnected() if/when the
72: * connection is established. When the connection is torn down
73: * soisdisconnecting() is called during processing of disconnect() call,
74: * and soisdisconnected() is called when the connection to the peer
75: * is totally severed. The semantics of these routines are such that
76: * connectionless protocols can call soisconnected() and soisdisconnected()
77: * only, bypassing the in-progress calls when setting up a ``connection''
78: * takes no time.
79: *
80: * From the passive side, a socket is created with
81: * two queues of sockets: so_q0 for connections in progress
82: * and so_q for connections already made and awaiting user acceptance.
83: * As a protocol is preparing incoming connections, it creates a socket
84: * structure queued on so_q0 by calling sonewconn(). When the connection
85: * is established, soisconnected() is called, and transfers the
86: * socket structure to so_q, making it available to accept().
1.66 perry 87: *
1.1 cgd 88: * If a socket is closed with sockets on either
89: * so_q0 or so_q, these sockets are dropped.
90: *
91: * If higher level protocols are implemented in
92: * the kernel, the wakeups done here will sometimes
93: * cause software-interrupt process scheduling.
94: */
95:
1.7 mycroft 96: void
1.37 lukem 97: soisconnecting(struct socket *so)
1.1 cgd 98: {
99:
100: so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
101: so->so_state |= SS_ISCONNECTING;
102: }
103:
1.7 mycroft 104: void
1.37 lukem 105: soisconnected(struct socket *so)
1.1 cgd 106: {
1.37 lukem 107: struct socket *head;
1.1 cgd 108:
1.37 lukem 109: head = so->so_head;
1.1 cgd 110: so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
111: so->so_state |= SS_ISCONNECTED;
112: if (head && soqremque(so, 0)) {
113: soqinsque(head, so, 1);
114: sorwakeup(head);
115: wakeup((caddr_t)&head->so_timeo);
116: } else {
117: wakeup((caddr_t)&so->so_timeo);
118: sorwakeup(so);
119: sowwakeup(so);
120: }
121: }
122:
1.7 mycroft 123: void
1.37 lukem 124: soisdisconnecting(struct socket *so)
1.1 cgd 125: {
126:
127: so->so_state &= ~SS_ISCONNECTING;
128: so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
129: wakeup((caddr_t)&so->so_timeo);
130: sowwakeup(so);
131: sorwakeup(so);
132: }
133:
1.7 mycroft 134: void
1.37 lukem 135: soisdisconnected(struct socket *so)
1.1 cgd 136: {
137:
138: so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1.27 mycroft 139: so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1.1 cgd 140: wakeup((caddr_t)&so->so_timeo);
141: sowwakeup(so);
142: sorwakeup(so);
143: }
144:
145: /*
146: * When an attempt at a new connection is noted on a socket
147: * which accepts connections, sonewconn is called. If the
148: * connection is possible (subject to space constraints, etc.)
149: * then we allocate a new structure, propoerly linked into the
150: * data structure of the original socket, and return this.
151: * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
152: *
153: * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
154: * to catch calls that are missing the (new) second parameter.
155: */
156: struct socket *
1.37 lukem 157: sonewconn1(struct socket *head, int connstatus)
1.1 cgd 158: {
1.37 lukem 159: struct socket *so;
160: int soqueue;
1.1 cgd 161:
1.37 lukem 162: soqueue = connstatus ? 1 : 0;
1.1 cgd 163: if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
164: return ((struct socket *)0);
1.25 thorpej 165: so = pool_get(&socket_pool, PR_NOWAIT);
1.66 perry 166: if (so == NULL)
1.25 thorpej 167: return (NULL);
1.26 perry 168: memset((caddr_t)so, 0, sizeof(*so));
1.1 cgd 169: so->so_type = head->so_type;
170: so->so_options = head->so_options &~ SO_ACCEPTCONN;
171: so->so_linger = head->so_linger;
172: so->so_state = head->so_state | SS_NOFDREF;
173: so->so_proto = head->so_proto;
174: so->so_timeo = head->so_timeo;
175: so->so_pgid = head->so_pgid;
1.24 matt 176: so->so_send = head->so_send;
177: so->so_receive = head->so_receive;
1.67 christos 178: so->so_uidinfo = head->so_uidinfo;
1.49 matt 179: #ifdef MBUFTRACE
180: so->so_mowner = head->so_mowner;
181: so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
182: so->so_snd.sb_mowner = head->so_snd.sb_mowner;
183: #endif
1.1 cgd 184: (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
185: soqinsque(head, so, soqueue);
186: if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
1.12 mycroft 187: (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0,
1.69 ! christos 188: (struct lwp *)0)) {
1.1 cgd 189: (void) soqremque(so, soqueue);
1.25 thorpej 190: pool_put(&socket_pool, so);
191: return (NULL);
1.1 cgd 192: }
193: if (connstatus) {
194: sorwakeup(head);
195: wakeup((caddr_t)&head->so_timeo);
196: so->so_state |= connstatus;
197: }
198: return (so);
199: }
200:
1.7 mycroft 201: void
1.37 lukem 202: soqinsque(struct socket *head, struct socket *so, int q)
1.1 cgd 203: {
204:
1.22 thorpej 205: #ifdef DIAGNOSTIC
206: if (so->so_onq != NULL)
207: panic("soqinsque");
208: #endif
209:
1.1 cgd 210: so->so_head = head;
211: if (q == 0) {
212: head->so_q0len++;
1.22 thorpej 213: so->so_onq = &head->so_q0;
1.1 cgd 214: } else {
215: head->so_qlen++;
1.22 thorpej 216: so->so_onq = &head->so_q;
1.1 cgd 217: }
1.22 thorpej 218: TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
1.1 cgd 219: }
220:
1.7 mycroft 221: int
1.37 lukem 222: soqremque(struct socket *so, int q)
1.1 cgd 223: {
1.37 lukem 224: struct socket *head;
1.1 cgd 225:
1.37 lukem 226: head = so->so_head;
1.22 thorpej 227: if (q == 0) {
228: if (so->so_onq != &head->so_q0)
1.17 thorpej 229: return (0);
1.1 cgd 230: head->so_q0len--;
231: } else {
1.22 thorpej 232: if (so->so_onq != &head->so_q)
233: return (0);
1.1 cgd 234: head->so_qlen--;
235: }
1.22 thorpej 236: TAILQ_REMOVE(so->so_onq, so, so_qe);
237: so->so_onq = NULL;
238: so->so_head = NULL;
1.1 cgd 239: return (1);
240: }
241:
242: /*
243: * Socantsendmore indicates that no more data will be sent on the
244: * socket; it would normally be applied to a socket when the user
245: * informs the system that no more data is to be sent, by the protocol
246: * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
247: * will be received, and will normally be applied to the socket by a
248: * protocol when it detects that the peer will send no more data.
249: * Data queued for reading in the socket may yet be read.
250: */
251:
1.4 andrew 252: void
1.37 lukem 253: socantsendmore(struct socket *so)
1.1 cgd 254: {
255:
256: so->so_state |= SS_CANTSENDMORE;
257: sowwakeup(so);
258: }
259:
1.4 andrew 260: void
1.37 lukem 261: socantrcvmore(struct socket *so)
1.1 cgd 262: {
263:
264: so->so_state |= SS_CANTRCVMORE;
265: sorwakeup(so);
266: }
267:
268: /*
269: * Wait for data to arrive at/drain from a socket buffer.
270: */
1.7 mycroft 271: int
1.37 lukem 272: sbwait(struct sockbuf *sb)
1.1 cgd 273: {
274:
275: sb->sb_flags |= SB_WAIT;
276: return (tsleep((caddr_t)&sb->sb_cc,
277: (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
278: sb->sb_timeo));
279: }
280:
1.66 perry 281: /*
1.1 cgd 282: * Lock a sockbuf already known to be locked;
283: * return any error returned from sleep (EINTR).
284: */
1.7 mycroft 285: int
1.37 lukem 286: sb_lock(struct sockbuf *sb)
1.1 cgd 287: {
1.37 lukem 288: int error;
1.1 cgd 289:
290: while (sb->sb_flags & SB_LOCK) {
291: sb->sb_flags |= SB_WANT;
1.66 perry 292: error = tsleep((caddr_t)&sb->sb_flags,
1.41 enami 293: (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
294: netlck, 0);
1.11 christos 295: if (error)
1.1 cgd 296: return (error);
297: }
298: sb->sb_flags |= SB_LOCK;
299: return (0);
300: }
301:
302: /*
303: * Wakeup processes waiting on a socket buffer.
304: * Do asynchronous notification via SIGIO
1.39 manu 305: * if the socket buffer has the SB_ASYNC flag set.
1.1 cgd 306: */
1.7 mycroft 307: void
1.55 christos 308: sowakeup(struct socket *so, struct sockbuf *sb, int code)
1.1 cgd 309: {
1.48 jdolecek 310: selnotify(&sb->sb_sel, 0);
1.7 mycroft 311: sb->sb_flags &= ~SB_SEL;
1.1 cgd 312: if (sb->sb_flags & SB_WAIT) {
313: sb->sb_flags &= ~SB_WAIT;
314: wakeup((caddr_t)&sb->sb_cc);
315: }
1.39 manu 316: if (sb->sb_flags & SB_ASYNC) {
1.56 jdolecek 317: int band;
1.57 christos 318: if (code == POLL_IN)
319: band = POLLIN|POLLRDNORM;
320: else
321: band = POLLOUT|POLLWRNORM;
322: fownsignal(so->so_pgid, SIGIO, code, band, so);
1.1 cgd 323: }
1.24 matt 324: if (sb->sb_flags & SB_UPCALL)
325: (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
1.1 cgd 326: }
327:
328: /*
329: * Socket buffer (struct sockbuf) utility routines.
330: *
331: * Each socket contains two socket buffers: one for sending data and
332: * one for receiving data. Each buffer contains a queue of mbufs,
333: * information about the number of mbufs and amount of data in the
1.13 mycroft 334: * queue, and other fields allowing poll() statements and notification
1.1 cgd 335: * on data availability to be implemented.
336: *
337: * Data stored in a socket buffer is maintained as a list of records.
338: * Each record is a list of mbufs chained together with the m_next
339: * field. Records are chained together with the m_nextpkt field. The upper
340: * level routine soreceive() expects the following conventions to be
341: * observed when placing information in the receive buffer:
342: *
343: * 1. If the protocol requires each message be preceded by the sender's
344: * name, then a record containing that name must be present before
345: * any associated data (mbuf's must be of type MT_SONAME).
346: * 2. If the protocol supports the exchange of ``access rights'' (really
347: * just additional data associated with the message), and there are
348: * ``rights'' to be received, then a record containing this data
1.10 mycroft 349: * should be present (mbuf's must be of type MT_CONTROL).
1.1 cgd 350: * 3. If a name or rights record exists, then it must be followed by
351: * a data record, perhaps of zero length.
352: *
353: * Before using a new socket structure it is first necessary to reserve
354: * buffer space to the socket, by calling sbreserve(). This should commit
355: * some of the available buffer space in the system buffer pool for the
356: * socket (currently, it does nothing but enforce limits). The space
357: * should be released by calling sbrelease() when the socket is destroyed.
358: */
359:
1.7 mycroft 360: int
1.58 thorpej 361: sb_max_set(u_long new_sbmax)
362: {
363: int s;
364:
365: if (new_sbmax < (16 * 1024))
366: return (EINVAL);
367:
368: s = splsoftnet();
369: sb_max = new_sbmax;
370: sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
371: splx(s);
372:
373: return (0);
374: }
375:
376: int
1.37 lukem 377: soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1.1 cgd 378: {
379:
1.59 christos 380: if (sbreserve(&so->so_snd, sndcc, so) == 0)
1.1 cgd 381: goto bad;
1.59 christos 382: if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
1.1 cgd 383: goto bad2;
384: if (so->so_rcv.sb_lowat == 0)
385: so->so_rcv.sb_lowat = 1;
386: if (so->so_snd.sb_lowat == 0)
387: so->so_snd.sb_lowat = MCLBYTES;
388: if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
389: so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
390: return (0);
1.37 lukem 391: bad2:
1.59 christos 392: sbrelease(&so->so_snd, so);
1.37 lukem 393: bad:
1.1 cgd 394: return (ENOBUFS);
395: }
396:
397: /*
398: * Allot mbufs to a sockbuf.
399: * Attempt to scale mbmax so that mbcnt doesn't become limiting
400: * if buffering efficiency is near the normal case.
401: */
1.7 mycroft 402: int
1.59 christos 403: sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
1.1 cgd 404: {
1.59 christos 405: struct proc *p = curproc; /* XXX */
1.62 christos 406: rlim_t maxcc;
1.67 christos 407: struct uidinfo *uidinfo;
1.1 cgd 408:
1.58 thorpej 409: KDASSERT(sb_max_adj != 0);
410: if (cc == 0 || cc > sb_max_adj)
1.1 cgd 411: return (0);
1.60 matt 412: if (so) {
1.67 christos 413: if (p && p->p_ucred->cr_uid == so->so_uidinfo->ui_uid)
1.60 matt 414: maxcc = p->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
415: else
416: maxcc = RLIM_INFINITY;
1.67 christos 417: uidinfo = so->so_uidinfo;
1.62 christos 418: } else {
1.67 christos 419: uidinfo = uid_find(0); /* XXX: nothing better */
1.62 christos 420: maxcc = RLIM_INFINITY;
1.60 matt 421: }
1.67 christos 422: if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
1.62 christos 423: return 0;
1.1 cgd 424: sb->sb_mbmax = min(cc * 2, sb_max);
425: if (sb->sb_lowat > sb->sb_hiwat)
426: sb->sb_lowat = sb->sb_hiwat;
427: return (1);
428: }
429:
430: /*
431: * Free mbufs held by a socket, and reserved mbuf space.
432: */
1.7 mycroft 433: void
1.59 christos 434: sbrelease(struct sockbuf *sb, struct socket *so)
1.1 cgd 435: {
436:
437: sbflush(sb);
1.67 christos 438: (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0,
1.59 christos 439: RLIM_INFINITY);
440: sb->sb_mbmax = 0;
1.1 cgd 441: }
442:
443: /*
444: * Routines to add and remove
445: * data from an mbuf queue.
446: *
447: * The routines sbappend() or sbappendrecord() are normally called to
448: * append new mbufs to a socket buffer, after checking that adequate
449: * space is available, comparing the function sbspace() with the amount
450: * of data to be added. sbappendrecord() differs from sbappend() in
451: * that data supplied is treated as the beginning of a new record.
452: * To place a sender's address, optional access rights, and data in a
453: * socket receive buffer, sbappendaddr() should be used. To place
454: * access rights and data in a socket receive buffer, sbappendrights()
455: * should be used. In either case, the new data begins a new record.
456: * Note that unlike sbappend() and sbappendrecord(), these routines check
457: * for the caller that there will be enough space to store the data.
458: * Each fails if there is not enough space, or if it cannot find mbufs
459: * to store additional information in.
460: *
461: * Reliable protocols may use the socket send buffer to hold data
462: * awaiting acknowledgement. Data is normally copied from a socket
463: * send buffer in a protocol with m_copy for output to a peer,
464: * and then removing the data from the socket buffer with sbdrop()
465: * or sbdroprecord() when the data is acknowledged by the peer.
466: */
467:
1.43 thorpej 468: #ifdef SOCKBUF_DEBUG
469: void
470: sblastrecordchk(struct sockbuf *sb, const char *where)
471: {
472: struct mbuf *m = sb->sb_mb;
473:
474: while (m && m->m_nextpkt)
475: m = m->m_nextpkt;
476:
477: if (m != sb->sb_lastrecord) {
478: printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
479: sb->sb_mb, sb->sb_lastrecord, m);
480: printf("packet chain:\n");
481: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
482: printf("\t%p\n", m);
1.47 provos 483: panic("sblastrecordchk from %s", where);
1.43 thorpej 484: }
485: }
486:
487: void
488: sblastmbufchk(struct sockbuf *sb, const char *where)
489: {
490: struct mbuf *m = sb->sb_mb;
491: struct mbuf *n;
492:
493: while (m && m->m_nextpkt)
494: m = m->m_nextpkt;
495:
496: while (m && m->m_next)
497: m = m->m_next;
498:
499: if (m != sb->sb_mbtail) {
500: printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
501: sb->sb_mb, sb->sb_mbtail, m);
502: printf("packet tree:\n");
503: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
504: printf("\t");
505: for (n = m; n != NULL; n = n->m_next)
506: printf("%p ", n);
507: printf("\n");
508: }
509: panic("sblastmbufchk from %s", where);
510: }
511: }
512: #endif /* SOCKBUF_DEBUG */
513:
1.63 jonathan 514: /*
515: * Link a chain of records onto a socket buffer
516: */
517: #define SBLINKRECORDCHAIN(sb, m0, mlast) \
1.43 thorpej 518: do { \
519: if ((sb)->sb_lastrecord != NULL) \
520: (sb)->sb_lastrecord->m_nextpkt = (m0); \
521: else \
522: (sb)->sb_mb = (m0); \
1.63 jonathan 523: (sb)->sb_lastrecord = (mlast); \
1.43 thorpej 524: } while (/*CONSTCOND*/0)
525:
1.63 jonathan 526:
527: #define SBLINKRECORD(sb, m0) \
528: SBLINKRECORDCHAIN(sb, m0, m0)
529:
1.1 cgd 530: /*
531: * Append mbuf chain m to the last record in the
532: * socket buffer sb. The additional space associated
533: * the mbuf chain is recorded in sb. Empty mbufs are
534: * discarded and mbufs are compacted where possible.
535: */
1.7 mycroft 536: void
1.37 lukem 537: sbappend(struct sockbuf *sb, struct mbuf *m)
1.1 cgd 538: {
1.37 lukem 539: struct mbuf *n;
1.1 cgd 540:
541: if (m == 0)
542: return;
1.43 thorpej 543:
1.49 matt 544: #ifdef MBUFTRACE
1.65 jonathan 545: m_claimm(m, sb->sb_mowner);
1.49 matt 546: #endif
547:
1.43 thorpej 548: SBLASTRECORDCHK(sb, "sbappend 1");
549:
550: if ((n = sb->sb_lastrecord) != NULL) {
551: /*
552: * XXX Would like to simply use sb_mbtail here, but
553: * XXX I need to verify that I won't miss an EOR that
554: * XXX way.
555: */
1.1 cgd 556: do {
557: if (n->m_flags & M_EOR) {
558: sbappendrecord(sb, m); /* XXXXXX!!!! */
559: return;
560: }
561: } while (n->m_next && (n = n->m_next));
1.43 thorpej 562: } else {
563: /*
564: * If this is the first record in the socket buffer, it's
565: * also the last record.
566: */
567: sb->sb_lastrecord = m;
1.1 cgd 568: }
569: sbcompress(sb, m, n);
1.43 thorpej 570: SBLASTRECORDCHK(sb, "sbappend 2");
571: }
572:
573: /*
574: * This version of sbappend() should only be used when the caller
575: * absolutely knows that there will never be more than one record
576: * in the socket buffer, that is, a stream protocol (such as TCP).
577: */
578: void
1.44 thorpej 579: sbappendstream(struct sockbuf *sb, struct mbuf *m)
1.43 thorpej 580: {
581:
582: KDASSERT(m->m_nextpkt == NULL);
583: KASSERT(sb->sb_mb == sb->sb_lastrecord);
584:
585: SBLASTMBUFCHK(sb, __func__);
586:
1.49 matt 587: #ifdef MBUFTRACE
1.65 jonathan 588: m_claimm(m, sb->sb_mowner);
1.49 matt 589: #endif
590:
1.43 thorpej 591: sbcompress(sb, m, sb->sb_mbtail);
592:
593: sb->sb_lastrecord = sb->sb_mb;
594: SBLASTRECORDCHK(sb, __func__);
1.1 cgd 595: }
596:
597: #ifdef SOCKBUF_DEBUG
1.7 mycroft 598: void
1.37 lukem 599: sbcheck(struct sockbuf *sb)
1.1 cgd 600: {
1.37 lukem 601: struct mbuf *m;
1.43 thorpej 602: u_long len, mbcnt;
1.1 cgd 603:
1.37 lukem 604: len = 0;
605: mbcnt = 0;
1.1 cgd 606: for (m = sb->sb_mb; m; m = m->m_next) {
607: len += m->m_len;
608: mbcnt += MSIZE;
609: if (m->m_flags & M_EXT)
610: mbcnt += m->m_ext.ext_size;
611: if (m->m_nextpkt)
612: panic("sbcheck nextpkt");
613: }
614: if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1.43 thorpej 615: printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
1.1 cgd 616: mbcnt, sb->sb_mbcnt);
617: panic("sbcheck");
618: }
619: }
620: #endif
621:
622: /*
623: * As above, except the mbuf chain
624: * begins a new record.
625: */
1.7 mycroft 626: void
1.37 lukem 627: sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 628: {
1.37 lukem 629: struct mbuf *m;
1.1 cgd 630:
631: if (m0 == 0)
632: return;
1.43 thorpej 633:
1.49 matt 634: #ifdef MBUFTRACE
1.65 jonathan 635: m_claimm(m0, sb->sb_mowner);
1.49 matt 636: #endif
1.1 cgd 637: /*
638: * Put the first mbuf on the queue.
639: * Note this permits zero length records.
640: */
641: sballoc(sb, m0);
1.43 thorpej 642: SBLASTRECORDCHK(sb, "sbappendrecord 1");
643: SBLINKRECORD(sb, m0);
1.1 cgd 644: m = m0->m_next;
645: m0->m_next = 0;
646: if (m && (m0->m_flags & M_EOR)) {
647: m0->m_flags &= ~M_EOR;
648: m->m_flags |= M_EOR;
649: }
650: sbcompress(sb, m, m0);
1.43 thorpej 651: SBLASTRECORDCHK(sb, "sbappendrecord 2");
1.1 cgd 652: }
653:
654: /*
655: * As above except that OOB data
656: * is inserted at the beginning of the sockbuf,
657: * but after any other OOB data.
658: */
1.7 mycroft 659: void
1.37 lukem 660: sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 661: {
1.37 lukem 662: struct mbuf *m, **mp;
1.1 cgd 663:
664: if (m0 == 0)
665: return;
1.43 thorpej 666:
667: SBLASTRECORDCHK(sb, "sbinsertoob 1");
668:
1.11 christos 669: for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
1.1 cgd 670: again:
671: switch (m->m_type) {
672:
673: case MT_OOBDATA:
674: continue; /* WANT next train */
675:
676: case MT_CONTROL:
1.11 christos 677: if ((m = m->m_next) != NULL)
1.1 cgd 678: goto again; /* inspect THIS train further */
679: }
680: break;
681: }
682: /*
683: * Put the first mbuf on the queue.
684: * Note this permits zero length records.
685: */
686: sballoc(sb, m0);
687: m0->m_nextpkt = *mp;
1.43 thorpej 688: if (*mp == NULL) {
689: /* m0 is actually the new tail */
690: sb->sb_lastrecord = m0;
691: }
1.1 cgd 692: *mp = m0;
693: m = m0->m_next;
694: m0->m_next = 0;
695: if (m && (m0->m_flags & M_EOR)) {
696: m0->m_flags &= ~M_EOR;
697: m->m_flags |= M_EOR;
698: }
699: sbcompress(sb, m, m0);
1.43 thorpej 700: SBLASTRECORDCHK(sb, "sbinsertoob 2");
1.1 cgd 701: }
702:
703: /*
704: * Append address and data, and optionally, control (ancillary) data
705: * to the receive queue of a socket. If present,
706: * m0 must include a packet header with total length.
707: * Returns 0 if no space in sockbuf or insufficient mbufs.
708: */
1.7 mycroft 709: int
1.61 matt 710: sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
1.37 lukem 711: struct mbuf *control)
1.1 cgd 712: {
1.43 thorpej 713: struct mbuf *m, *n, *nlast;
1.50 fvdl 714: int space, len;
1.1 cgd 715:
1.37 lukem 716: space = asa->sa_len;
717:
1.49 matt 718: if (m0 != NULL) {
719: if ((m0->m_flags & M_PKTHDR) == 0)
720: panic("sbappendaddr");
1.1 cgd 721: space += m0->m_pkthdr.len;
1.49 matt 722: #ifdef MBUFTRACE
1.65 jonathan 723: m_claimm(m0, sb->sb_mowner);
1.49 matt 724: #endif
725: }
1.1 cgd 726: for (n = control; n; n = n->m_next) {
727: space += n->m_len;
1.49 matt 728: MCLAIM(n, sb->sb_mowner);
1.1 cgd 729: if (n->m_next == 0) /* keep pointer to last control buf */
730: break;
731: }
732: if (space > sbspace(sb))
733: return (0);
734: MGET(m, M_DONTWAIT, MT_SONAME);
735: if (m == 0)
736: return (0);
1.49 matt 737: MCLAIM(m, sb->sb_mowner);
1.50 fvdl 738: /*
739: * XXX avoid 'comparison always true' warning which isn't easily
740: * avoided.
741: */
742: len = asa->sa_len;
743: if (len > MLEN) {
1.20 thorpej 744: MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
745: if ((m->m_flags & M_EXT) == 0) {
746: m_free(m);
747: return (0);
748: }
749: }
1.1 cgd 750: m->m_len = asa->sa_len;
1.68 christos 751: memcpy(mtod(m, caddr_t), asa, asa->sa_len);
1.1 cgd 752: if (n)
753: n->m_next = m0; /* concatenate data to control */
754: else
755: control = m0;
756: m->m_next = control;
1.43 thorpej 757:
758: SBLASTRECORDCHK(sb, "sbappendaddr 1");
759:
760: for (n = m; n->m_next != NULL; n = n->m_next)
1.1 cgd 761: sballoc(sb, n);
1.43 thorpej 762: sballoc(sb, n);
763: nlast = n;
764: SBLINKRECORD(sb, m);
765:
766: sb->sb_mbtail = nlast;
767: SBLASTMBUFCHK(sb, "sbappendaddr");
768:
769: SBLASTRECORDCHK(sb, "sbappendaddr 2");
770:
1.1 cgd 771: return (1);
772: }
773:
1.63 jonathan 774: /*
775: * Helper for sbappendchainaddr: prepend a struct sockaddr* to
776: * an mbuf chain.
777: */
778: static __inline struct mbuf *
1.64 jonathan 779: m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
780: const struct sockaddr *asa)
1.63 jonathan 781: {
782: struct mbuf *m;
1.64 jonathan 783: const int salen = asa->sa_len;
1.63 jonathan 784:
785: /* only the first in each chain need be a pkthdr */
786: MGETHDR(m, M_DONTWAIT, MT_SONAME);
787: if (m == 0)
788: return (0);
789: MCLAIM(m, sb->sb_mowner);
1.64 jonathan 790: #ifdef notyet
791: if (salen > MHLEN) {
792: MEXTMALLOC(m, salen, M_NOWAIT);
793: if ((m->m_flags & M_EXT) == 0) {
794: m_free(m);
795: return (0);
796: }
797: }
798: #else
799: KASSERT(salen <= MHLEN);
800: #endif
801: m->m_len = salen;
1.68 christos 802: memcpy(mtod(m, caddr_t), asa, salen);
1.63 jonathan 803: m->m_next = m0;
1.64 jonathan 804: m->m_pkthdr.len = salen + m0->m_pkthdr.len;
1.63 jonathan 805:
806: return m;
807: }
808:
809: int
810: sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
811: struct mbuf *m0, int sbprio)
812: {
813: int space;
814: struct mbuf *m, *n, *n0, *nlast;
815: int error;
816:
817: /*
818: * XXX sbprio reserved for encoding priority of this* request:
819: * SB_PRIO_NONE --> honour normal sb limits
820: * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
821: * take whole chain. Intended for large requests
822: * that should be delivered atomically (all, or none).
823: * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
824: * over normal socket limits, for messages indicating
825: * buffer overflow in earlier normal/lower-priority messages
826: * SB_PRIO_BESTEFFORT --> ignore limits entirely.
827: * Intended for kernel-generated messages only.
828: * Up to generator to avoid total mbuf resource exhaustion.
829: */
830: (void)sbprio;
831:
832: if (m0 && (m0->m_flags & M_PKTHDR) == 0)
833: panic("sbappendaddrchain");
834:
835: space = sbspace(sb);
1.66 perry 836:
1.63 jonathan 837: #ifdef notyet
1.66 perry 838: /*
1.63 jonathan 839: * Enforce SB_PRIO_* limits as described above.
840: */
841: #endif
842:
843: n0 = NULL;
844: nlast = NULL;
845: for (m = m0; m; m = m->m_nextpkt) {
846: struct mbuf *np;
847:
1.64 jonathan 848: #ifdef MBUFTRACE
1.65 jonathan 849: m_claimm(m, sb->sb_mowner);
1.64 jonathan 850: #endif
851:
1.63 jonathan 852: /* Prepend sockaddr to this record (m) of input chain m0 */
1.64 jonathan 853: n = m_prepend_sockaddr(sb, m, asa);
1.63 jonathan 854: if (n == NULL) {
855: error = ENOBUFS;
856: goto bad;
857: }
858:
859: /* Append record (asa+m) to end of new chain n0 */
860: if (n0 == NULL) {
861: n0 = n;
862: } else {
863: nlast->m_nextpkt = n;
864: }
865: /* Keep track of last record on new chain */
866: nlast = n;
867:
868: for (np = n; np; np = np->m_next)
869: sballoc(sb, np);
870: }
871:
1.64 jonathan 872: SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
873:
1.63 jonathan 874: /* Drop the entire chain of (asa+m) records onto the socket */
875: SBLINKRECORDCHAIN(sb, n0, nlast);
1.64 jonathan 876:
877: SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
878:
1.63 jonathan 879: for (m = nlast; m->m_next; m = m->m_next)
880: ;
881: sb->sb_mbtail = m;
1.64 jonathan 882: SBLASTMBUFCHK(sb, "sbappendaddrchain");
883:
1.63 jonathan 884: return (1);
885:
886: bad:
1.64 jonathan 887: /*
888: * On error, free the prepended addreseses. For consistency
889: * with sbappendaddr(), leave it to our caller to free
890: * the input record chain passed to us as m0.
891: */
892: while ((n = n0) != NULL) {
893: struct mbuf *np;
894:
895: /* Undo the sballoc() of this record */
896: for (np = n; np; np = np->m_next)
897: sbfree(sb, np);
898:
899: n0 = n->m_nextpkt; /* iterate at next prepended address */
900: MFREE(n, np); /* free prepended address (not data) */
901: }
1.66 perry 902: return 0;
1.63 jonathan 903: }
904:
905:
1.7 mycroft 906: int
1.37 lukem 907: sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
1.1 cgd 908: {
1.43 thorpej 909: struct mbuf *m, *mlast, *n;
1.37 lukem 910: int space;
1.1 cgd 911:
1.37 lukem 912: space = 0;
1.1 cgd 913: if (control == 0)
914: panic("sbappendcontrol");
915: for (m = control; ; m = m->m_next) {
916: space += m->m_len;
1.49 matt 917: MCLAIM(m, sb->sb_mowner);
1.1 cgd 918: if (m->m_next == 0)
919: break;
920: }
921: n = m; /* save pointer to last control buffer */
1.49 matt 922: for (m = m0; m; m = m->m_next) {
923: MCLAIM(m, sb->sb_mowner);
1.1 cgd 924: space += m->m_len;
1.49 matt 925: }
1.1 cgd 926: if (space > sbspace(sb))
927: return (0);
928: n->m_next = m0; /* concatenate data to control */
1.43 thorpej 929:
930: SBLASTRECORDCHK(sb, "sbappendcontrol 1");
931:
932: for (m = control; m->m_next != NULL; m = m->m_next)
1.1 cgd 933: sballoc(sb, m);
1.43 thorpej 934: sballoc(sb, m);
935: mlast = m;
936: SBLINKRECORD(sb, control);
937:
938: sb->sb_mbtail = mlast;
939: SBLASTMBUFCHK(sb, "sbappendcontrol");
940:
941: SBLASTRECORDCHK(sb, "sbappendcontrol 2");
942:
1.1 cgd 943: return (1);
944: }
945:
946: /*
947: * Compress mbuf chain m into the socket
948: * buffer sb following mbuf n. If n
949: * is null, the buffer is presumed empty.
950: */
1.7 mycroft 951: void
1.37 lukem 952: sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1.1 cgd 953: {
1.37 lukem 954: int eor;
955: struct mbuf *o;
1.1 cgd 956:
1.37 lukem 957: eor = 0;
1.1 cgd 958: while (m) {
959: eor |= m->m_flags & M_EOR;
960: if (m->m_len == 0 &&
961: (eor == 0 ||
962: (((o = m->m_next) || (o = n)) &&
963: o->m_type == m->m_type))) {
1.46 thorpej 964: if (sb->sb_lastrecord == m)
965: sb->sb_lastrecord = m->m_next;
1.1 cgd 966: m = m_free(m);
967: continue;
968: }
1.40 thorpej 969: if (n && (n->m_flags & M_EOR) == 0 &&
970: /* M_TRAILINGSPACE() checks buffer writeability */
971: m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
972: m->m_len <= M_TRAILINGSPACE(n) &&
973: n->m_type == m->m_type) {
1.26 perry 974: memcpy(mtod(n, caddr_t) + n->m_len, mtod(m, caddr_t),
1.1 cgd 975: (unsigned)m->m_len);
976: n->m_len += m->m_len;
977: sb->sb_cc += m->m_len;
978: m = m_free(m);
979: continue;
980: }
981: if (n)
982: n->m_next = m;
983: else
984: sb->sb_mb = m;
1.43 thorpej 985: sb->sb_mbtail = m;
1.1 cgd 986: sballoc(sb, m);
987: n = m;
988: m->m_flags &= ~M_EOR;
989: m = m->m_next;
990: n->m_next = 0;
991: }
992: if (eor) {
993: if (n)
994: n->m_flags |= eor;
995: else
1.15 christos 996: printf("semi-panic: sbcompress\n");
1.1 cgd 997: }
1.43 thorpej 998: SBLASTMBUFCHK(sb, __func__);
1.1 cgd 999: }
1000:
1001: /*
1002: * Free all mbufs in a sockbuf.
1003: * Check that all resources are reclaimed.
1004: */
1.7 mycroft 1005: void
1.37 lukem 1006: sbflush(struct sockbuf *sb)
1.1 cgd 1007: {
1008:
1.43 thorpej 1009: KASSERT((sb->sb_flags & SB_LOCK) == 0);
1010:
1.1 cgd 1011: while (sb->sb_mbcnt)
1012: sbdrop(sb, (int)sb->sb_cc);
1.43 thorpej 1013:
1014: KASSERT(sb->sb_cc == 0);
1015: KASSERT(sb->sb_mb == NULL);
1016: KASSERT(sb->sb_mbtail == NULL);
1017: KASSERT(sb->sb_lastrecord == NULL);
1.1 cgd 1018: }
1019:
1020: /*
1021: * Drop data from (the front of) a sockbuf.
1022: */
1.7 mycroft 1023: void
1.37 lukem 1024: sbdrop(struct sockbuf *sb, int len)
1.1 cgd 1025: {
1.37 lukem 1026: struct mbuf *m, *mn, *next;
1.1 cgd 1027:
1028: next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1029: while (len > 0) {
1030: if (m == 0) {
1031: if (next == 0)
1032: panic("sbdrop");
1033: m = next;
1034: next = m->m_nextpkt;
1035: continue;
1036: }
1037: if (m->m_len > len) {
1038: m->m_len -= len;
1039: m->m_data += len;
1040: sb->sb_cc -= len;
1041: break;
1042: }
1043: len -= m->m_len;
1044: sbfree(sb, m);
1045: MFREE(m, mn);
1046: m = mn;
1047: }
1048: while (m && m->m_len == 0) {
1049: sbfree(sb, m);
1050: MFREE(m, mn);
1051: m = mn;
1052: }
1053: if (m) {
1054: sb->sb_mb = m;
1055: m->m_nextpkt = next;
1056: } else
1057: sb->sb_mb = next;
1.43 thorpej 1058: /*
1.45 thorpej 1059: * First part is an inline SB_EMPTY_FIXUP(). Second part
1.43 thorpej 1060: * makes sure sb_lastrecord is up-to-date if we dropped
1061: * part of the last record.
1062: */
1063: m = sb->sb_mb;
1064: if (m == NULL) {
1065: sb->sb_mbtail = NULL;
1066: sb->sb_lastrecord = NULL;
1067: } else if (m->m_nextpkt == NULL)
1068: sb->sb_lastrecord = m;
1.1 cgd 1069: }
1070:
1071: /*
1072: * Drop a record off the front of a sockbuf
1073: * and move the next record to the front.
1074: */
1.7 mycroft 1075: void
1.37 lukem 1076: sbdroprecord(struct sockbuf *sb)
1.1 cgd 1077: {
1.37 lukem 1078: struct mbuf *m, *mn;
1.1 cgd 1079:
1080: m = sb->sb_mb;
1081: if (m) {
1082: sb->sb_mb = m->m_nextpkt;
1083: do {
1084: sbfree(sb, m);
1085: MFREE(m, mn);
1.11 christos 1086: } while ((m = mn) != NULL);
1.1 cgd 1087: }
1.45 thorpej 1088: SB_EMPTY_FIXUP(sb);
1.19 thorpej 1089: }
1090:
1091: /*
1092: * Create a "control" mbuf containing the specified data
1093: * with the specified type for presentation on a socket buffer.
1094: */
1095: struct mbuf *
1.37 lukem 1096: sbcreatecontrol(caddr_t p, int size, int type, int level)
1.19 thorpej 1097: {
1.37 lukem 1098: struct cmsghdr *cp;
1099: struct mbuf *m;
1.19 thorpej 1100:
1.35 itojun 1101: if (CMSG_SPACE(size) > MCLBYTES) {
1.30 itojun 1102: printf("sbcreatecontrol: message too large %d\n", size);
1103: return NULL;
1104: }
1105:
1.19 thorpej 1106: if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1107: return ((struct mbuf *) NULL);
1.35 itojun 1108: if (CMSG_SPACE(size) > MLEN) {
1.30 itojun 1109: MCLGET(m, M_DONTWAIT);
1110: if ((m->m_flags & M_EXT) == 0) {
1111: m_free(m);
1112: return NULL;
1113: }
1114: }
1.19 thorpej 1115: cp = mtod(m, struct cmsghdr *);
1.26 perry 1116: memcpy(CMSG_DATA(cp), p, size);
1.35 itojun 1117: m->m_len = CMSG_SPACE(size);
1118: cp->cmsg_len = CMSG_LEN(size);
1.19 thorpej 1119: cp->cmsg_level = level;
1120: cp->cmsg_type = type;
1121: return (m);
1.1 cgd 1122: }
CVSweb <webmaster@jp.NetBSD.org>