Annotation of src/sys/kern/uipc_socket2.c, Revision 1.89.6.5
1.89.6.1 mjf 1: /* $NetBSD$ */
1.9 cgd 2:
1.89.6.2 mjf 3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
28:
1.1 cgd 29: /*
1.7 mycroft 30: * Copyright (c) 1982, 1986, 1988, 1990, 1993
31: * The Regents of the University of California. All rights reserved.
1.1 cgd 32: *
33: * Redistribution and use in source and binary forms, with or without
34: * modification, are permitted provided that the following conditions
35: * are met:
36: * 1. Redistributions of source code must retain the above copyright
37: * notice, this list of conditions and the following disclaimer.
38: * 2. Redistributions in binary form must reproduce the above copyright
39: * notice, this list of conditions and the following disclaimer in the
40: * documentation and/or other materials provided with the distribution.
1.54 agc 41: * 3. Neither the name of the University nor the names of its contributors
1.1 cgd 42: * may be used to endorse or promote products derived from this software
43: * without specific prior written permission.
44: *
45: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55: * SUCH DAMAGE.
56: *
1.23 fvdl 57: * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95
1.1 cgd 58: */
1.42 lukem 59:
60: #include <sys/cdefs.h>
1.89.6.1 mjf 61: __KERNEL_RCSID(0, "$NetBSD$");
1.51 martin 62:
63: #include "opt_mbuftrace.h"
1.58 thorpej 64: #include "opt_sb_max.h"
1.1 cgd 65:
1.5 mycroft 66: #include <sys/param.h>
67: #include <sys/systm.h>
68: #include <sys/proc.h>
69: #include <sys/file.h>
70: #include <sys/buf.h>
71: #include <sys/malloc.h>
72: #include <sys/mbuf.h>
73: #include <sys/protosw.h>
1.89.6.2 mjf 74: #include <sys/domain.h>
1.55 christos 75: #include <sys/poll.h>
1.5 mycroft 76: #include <sys/socket.h>
77: #include <sys/socketvar.h>
1.11 christos 78: #include <sys/signalvar.h>
1.71 elad 79: #include <sys/kauth.h>
1.89.6.2 mjf 80: #include <sys/pool.h>
1.89.6.5! mjf 81: #include <sys/uidinfo.h>
1.1 cgd 82:
83: /*
1.89.6.2 mjf 84: * Primitive routines for operating on sockets and socket buffers.
85: *
86: * Locking rules and assumptions:
87: *
88: * o socket::so_lock can change on the fly. The low level routines used
89: * to lock sockets are aware of this. When so_lock is acquired, the
90: * routine locking must check to see if so_lock still points to the
91: * lock that was acquired. If so_lock has changed in the meantime, the
92: * now irellevant lock that was acquired must be dropped and the lock
93: * operation retried. Although not proven here, this is completely safe
94: * on a multiprocessor system, even with relaxed memory ordering, given
95: * the next two rules:
96: *
97: * o In order to mutate so_lock, the lock pointed to by the current value
98: * of so_lock must be held: i.e., the socket must be held locked by the
99: * changing thread. The thread must issue membar_exit() to prevent
100: * memory accesses being reordered, and can set so_lock to the desired
101: * value. If the lock pointed to by the new value of so_lock is not
102: * held by the changing thread, the socket must then be considered
103: * unlocked.
104: *
105: * o If so_lock is mutated, and the previous lock referred to by so_lock
106: * could still be visible to other threads in the system (e.g. via file
107: * descriptor or protocol-internal reference), then the old lock must
108: * remain valid until the socket and/or protocol control block has been
109: * torn down.
110: *
111: * o If a socket has a non-NULL so_head value (i.e. is in the process of
112: * connecting), then locking the socket must also lock the socket pointed
113: * to by so_head: their lock pointers must match.
114: *
115: * o If a socket has connections in progress (so_q, so_q0 not empty) then
116: * locking the socket must also lock the sockets attached to both queues.
117: * Again, their lock pointers must match.
118: *
119: * o Beyond the initial lock assigment in socreate(), assigning locks to
120: * sockets is the responsibility of the individual protocols / protocol
121: * domains.
1.1 cgd 122: */
123:
1.89.6.2 mjf 124: static pool_cache_t socket_cache;
1.1 cgd 125:
1.58 thorpej 126: u_long sb_max = SB_MAX; /* maximum socket buffer size */
127: static u_long sb_max_adj; /* adjusted sb_max */
128:
1.1 cgd 129: /*
130: * Procedures to manipulate state flags of socket
131: * and do appropriate wakeups. Normal sequence from the
132: * active (originating) side is that soisconnecting() is
133: * called during processing of connect() call,
134: * resulting in an eventual call to soisconnected() if/when the
135: * connection is established. When the connection is torn down
136: * soisdisconnecting() is called during processing of disconnect() call,
137: * and soisdisconnected() is called when the connection to the peer
138: * is totally severed. The semantics of these routines are such that
139: * connectionless protocols can call soisconnected() and soisdisconnected()
140: * only, bypassing the in-progress calls when setting up a ``connection''
141: * takes no time.
142: *
143: * From the passive side, a socket is created with
144: * two queues of sockets: so_q0 for connections in progress
145: * and so_q for connections already made and awaiting user acceptance.
146: * As a protocol is preparing incoming connections, it creates a socket
147: * structure queued on so_q0 by calling sonewconn(). When the connection
148: * is established, soisconnected() is called, and transfers the
149: * socket structure to so_q, making it available to accept().
1.66 perry 150: *
1.1 cgd 151: * If a socket is closed with sockets on either
152: * so_q0 or so_q, these sockets are dropped.
153: *
154: * If higher level protocols are implemented in
155: * the kernel, the wakeups done here will sometimes
156: * cause software-interrupt process scheduling.
157: */
158:
1.7 mycroft 159: void
1.37 lukem 160: soisconnecting(struct socket *so)
1.1 cgd 161: {
162:
1.89.6.2 mjf 163: KASSERT(solocked(so));
164:
1.1 cgd 165: so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
166: so->so_state |= SS_ISCONNECTING;
167: }
168:
1.7 mycroft 169: void
1.37 lukem 170: soisconnected(struct socket *so)
1.1 cgd 171: {
1.37 lukem 172: struct socket *head;
1.1 cgd 173:
1.37 lukem 174: head = so->so_head;
1.89.6.2 mjf 175:
176: KASSERT(solocked(so));
177: KASSERT(head == NULL || solocked2(so, head));
178:
1.1 cgd 179: so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
180: so->so_state |= SS_ISCONNECTED;
1.89.6.4 mjf 181: if (head && so->so_onq == &head->so_q0) {
182: if ((so->so_options & SO_ACCEPTFILTER) == 0) {
183: soqremque(so, 0);
184: soqinsque(head, so, 1);
185: sorwakeup(head);
186: cv_broadcast(&head->so_cv);
187: } else {
188: so->so_upcall =
189: head->so_accf->so_accept_filter->accf_callback;
190: so->so_upcallarg = head->so_accf->so_accept_filter_arg;
191: so->so_rcv.sb_flags |= SB_UPCALL;
192: so->so_options &= ~SO_ACCEPTFILTER;
1.89.6.5! mjf 193: (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); }
1.1 cgd 194: } else {
1.89.6.2 mjf 195: cv_broadcast(&so->so_cv);
1.1 cgd 196: sorwakeup(so);
197: sowwakeup(so);
198: }
199: }
200:
1.7 mycroft 201: void
1.37 lukem 202: soisdisconnecting(struct socket *so)
1.1 cgd 203: {
204:
1.89.6.2 mjf 205: KASSERT(solocked(so));
206:
1.1 cgd 207: so->so_state &= ~SS_ISCONNECTING;
208: so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
1.89.6.2 mjf 209: cv_broadcast(&so->so_cv);
1.1 cgd 210: sowwakeup(so);
211: sorwakeup(so);
212: }
213:
1.7 mycroft 214: void
1.37 lukem 215: soisdisconnected(struct socket *so)
1.1 cgd 216: {
217:
1.89.6.2 mjf 218: KASSERT(solocked(so));
219:
1.1 cgd 220: so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1.27 mycroft 221: so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1.89.6.2 mjf 222: cv_broadcast(&so->so_cv);
1.1 cgd 223: sowwakeup(so);
224: sorwakeup(so);
225: }
226:
1.89.6.2 mjf 227: void
228: soinit2(void)
229: {
230:
231: socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0,
232: "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL);
233: }
234:
1.1 cgd 235: /*
236: * When an attempt at a new connection is noted on a socket
237: * which accepts connections, sonewconn is called. If the
238: * connection is possible (subject to space constraints, etc.)
239: * then we allocate a new structure, propoerly linked into the
240: * data structure of the original socket, and return this.
1.77 plunky 241: * Connstatus may be 0, SS_ISCONFIRMING, or SS_ISCONNECTED.
1.1 cgd 242: */
243: struct socket *
1.76 plunky 244: sonewconn(struct socket *head, int connstatus)
1.1 cgd 245: {
1.37 lukem 246: struct socket *so;
1.89.6.2 mjf 247: int soqueue, error;
248:
249: KASSERT(solocked(head));
1.1 cgd 250:
1.89.6.4 mjf 251: if ((head->so_options & SO_ACCEPTFILTER) != 0)
252: connstatus = 0;
1.37 lukem 253: soqueue = connstatus ? 1 : 0;
1.1 cgd 254: if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
1.89.6.5! mjf 255: return NULL;
1.89.6.2 mjf 256: so = soget(false);
1.66 perry 257: if (so == NULL)
1.89.6.5! mjf 258: return NULL;
1.89.6.2 mjf 259: mutex_obj_hold(head->so_lock);
260: so->so_lock = head->so_lock;
1.1 cgd 261: so->so_type = head->so_type;
262: so->so_options = head->so_options &~ SO_ACCEPTCONN;
263: so->so_linger = head->so_linger;
264: so->so_state = head->so_state | SS_NOFDREF;
1.89 ad 265: so->so_nbio = head->so_nbio;
1.1 cgd 266: so->so_proto = head->so_proto;
267: so->so_timeo = head->so_timeo;
268: so->so_pgid = head->so_pgid;
1.24 matt 269: so->so_send = head->so_send;
270: so->so_receive = head->so_receive;
1.67 christos 271: so->so_uidinfo = head->so_uidinfo;
1.89.6.3 mjf 272: so->so_egid = head->so_egid;
273: so->so_cpid = head->so_cpid;
1.49 matt 274: #ifdef MBUFTRACE
275: so->so_mowner = head->so_mowner;
276: so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
277: so->so_snd.sb_mowner = head->so_snd.sb_mowner;
278: #endif
1.1 cgd 279: (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
1.83 tls 280: so->so_snd.sb_lowat = head->so_snd.sb_lowat;
281: so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
1.84 tls 282: so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
283: so->so_snd.sb_timeo = head->so_snd.sb_timeo;
1.85 rmind 284: so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
285: so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
1.1 cgd 286: soqinsque(head, so, soqueue);
1.89.6.2 mjf 287: error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL,
288: NULL, NULL);
289: KASSERT(solocked(so));
290: if (error != 0) {
1.1 cgd 291: (void) soqremque(so, soqueue);
1.89.6.5! mjf 292: /*
! 293: * Remove acccept filter if one is present.
! 294: * XXX Is this really needed?
! 295: */
1.89.6.4 mjf 296: if (so->so_accf != NULL)
1.89.6.5! mjf 297: (void)accept_filt_clear(so);
1.89.6.2 mjf 298: soput(so);
1.89.6.5! mjf 299: return NULL;
1.1 cgd 300: }
301: if (connstatus) {
302: sorwakeup(head);
1.89.6.2 mjf 303: cv_broadcast(&head->so_cv);
1.1 cgd 304: so->so_state |= connstatus;
305: }
1.89.6.5! mjf 306: return so;
1.1 cgd 307: }
308:
1.89.6.2 mjf 309: struct socket *
310: soget(bool waitok)
311: {
312: struct socket *so;
313:
314: so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
315: if (__predict_false(so == NULL))
316: return (NULL);
317: memset(so, 0, sizeof(*so));
318: TAILQ_INIT(&so->so_q0);
319: TAILQ_INIT(&so->so_q);
320: cv_init(&so->so_cv, "socket");
321: cv_init(&so->so_rcv.sb_cv, "netio");
322: cv_init(&so->so_snd.sb_cv, "netio");
323: selinit(&so->so_rcv.sb_sel);
324: selinit(&so->so_snd.sb_sel);
325: so->so_rcv.sb_so = so;
326: so->so_snd.sb_so = so;
327: return so;
328: }
329:
330: void
331: soput(struct socket *so)
332: {
333:
334: KASSERT(!cv_has_waiters(&so->so_cv));
335: KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
336: KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
337: seldestroy(&so->so_rcv.sb_sel);
338: seldestroy(&so->so_snd.sb_sel);
339: mutex_obj_free(so->so_lock);
340: cv_destroy(&so->so_cv);
341: cv_destroy(&so->so_rcv.sb_cv);
342: cv_destroy(&so->so_snd.sb_cv);
343: pool_cache_put(socket_cache, so);
344: }
345:
1.7 mycroft 346: void
1.37 lukem 347: soqinsque(struct socket *head, struct socket *so, int q)
1.1 cgd 348: {
349:
1.89.6.2 mjf 350: KASSERT(solocked2(head, so));
351:
1.22 thorpej 352: #ifdef DIAGNOSTIC
353: if (so->so_onq != NULL)
354: panic("soqinsque");
355: #endif
356:
1.1 cgd 357: so->so_head = head;
358: if (q == 0) {
359: head->so_q0len++;
1.22 thorpej 360: so->so_onq = &head->so_q0;
1.1 cgd 361: } else {
362: head->so_qlen++;
1.22 thorpej 363: so->so_onq = &head->so_q;
1.1 cgd 364: }
1.22 thorpej 365: TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
1.1 cgd 366: }
367:
1.7 mycroft 368: int
1.37 lukem 369: soqremque(struct socket *so, int q)
1.1 cgd 370: {
1.37 lukem 371: struct socket *head;
1.1 cgd 372:
1.37 lukem 373: head = so->so_head;
1.89.6.2 mjf 374:
375: KASSERT(solocked(so));
1.22 thorpej 376: if (q == 0) {
377: if (so->so_onq != &head->so_q0)
1.17 thorpej 378: return (0);
1.1 cgd 379: head->so_q0len--;
380: } else {
1.22 thorpej 381: if (so->so_onq != &head->so_q)
382: return (0);
1.1 cgd 383: head->so_qlen--;
384: }
1.89.6.2 mjf 385: KASSERT(solocked2(so, head));
1.22 thorpej 386: TAILQ_REMOVE(so->so_onq, so, so_qe);
387: so->so_onq = NULL;
388: so->so_head = NULL;
1.1 cgd 389: return (1);
390: }
391:
392: /*
393: * Socantsendmore indicates that no more data will be sent on the
394: * socket; it would normally be applied to a socket when the user
395: * informs the system that no more data is to be sent, by the protocol
396: * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
397: * will be received, and will normally be applied to the socket by a
398: * protocol when it detects that the peer will send no more data.
399: * Data queued for reading in the socket may yet be read.
400: */
401:
1.4 andrew 402: void
1.37 lukem 403: socantsendmore(struct socket *so)
1.1 cgd 404: {
405:
1.89.6.2 mjf 406: KASSERT(solocked(so));
407:
1.1 cgd 408: so->so_state |= SS_CANTSENDMORE;
409: sowwakeup(so);
410: }
411:
1.4 andrew 412: void
1.37 lukem 413: socantrcvmore(struct socket *so)
1.1 cgd 414: {
415:
1.89.6.2 mjf 416: KASSERT(solocked(so));
417:
1.1 cgd 418: so->so_state |= SS_CANTRCVMORE;
419: sorwakeup(so);
420: }
421:
422: /*
423: * Wait for data to arrive at/drain from a socket buffer.
424: */
1.7 mycroft 425: int
1.37 lukem 426: sbwait(struct sockbuf *sb)
1.1 cgd 427: {
1.89.6.2 mjf 428: struct socket *so;
429: kmutex_t *lock;
430: int error;
1.1 cgd 431:
1.89.6.2 mjf 432: so = sb->sb_so;
1.1 cgd 433:
1.89.6.2 mjf 434: KASSERT(solocked(so));
1.1 cgd 435:
1.89.6.2 mjf 436: sb->sb_flags |= SB_NOTIFY;
437: lock = so->so_lock;
438: if ((sb->sb_flags & SB_NOINTR) != 0)
439: error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo);
440: else
441: error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo);
442: if (__predict_false(lock != so->so_lock))
443: solockretry(so, lock);
444: return error;
1.1 cgd 445: }
446:
447: /*
448: * Wakeup processes waiting on a socket buffer.
449: * Do asynchronous notification via SIGIO
1.39 manu 450: * if the socket buffer has the SB_ASYNC flag set.
1.1 cgd 451: */
1.7 mycroft 452: void
1.55 christos 453: sowakeup(struct socket *so, struct sockbuf *sb, int code)
1.1 cgd 454: {
1.89.6.1 mjf 455: int band;
456:
1.89.6.2 mjf 457: KASSERT(solocked(so));
458: KASSERT(sb->sb_so == so);
459:
1.89.6.1 mjf 460: if (code == POLL_IN)
461: band = POLLIN|POLLRDNORM;
462: else
463: band = POLLOUT|POLLWRNORM;
1.89.6.2 mjf 464: sb->sb_flags &= ~SB_NOTIFY;
465: selnotify(&sb->sb_sel, band, NOTE_SUBMIT);
466: cv_broadcast(&sb->sb_cv);
1.89.6.1 mjf 467: if (sb->sb_flags & SB_ASYNC)
1.57 christos 468: fownsignal(so->so_pgid, SIGIO, code, band, so);
1.24 matt 469: if (sb->sb_flags & SB_UPCALL)
470: (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
1.1 cgd 471: }
472:
473: /*
1.89.6.3 mjf 474: * Reset a socket's lock pointer. Wake all threads waiting on the
475: * socket's condition variables so that they can restart their waits
476: * using the new lock. The existing lock must be held.
477: */
478: void
479: solockreset(struct socket *so, kmutex_t *lock)
480: {
481:
482: KASSERT(solocked(so));
483:
484: so->so_lock = lock;
485: cv_broadcast(&so->so_snd.sb_cv);
486: cv_broadcast(&so->so_rcv.sb_cv);
487: cv_broadcast(&so->so_cv);
488: }
489:
490: /*
1.1 cgd 491: * Socket buffer (struct sockbuf) utility routines.
492: *
493: * Each socket contains two socket buffers: one for sending data and
494: * one for receiving data. Each buffer contains a queue of mbufs,
495: * information about the number of mbufs and amount of data in the
1.13 mycroft 496: * queue, and other fields allowing poll() statements and notification
1.1 cgd 497: * on data availability to be implemented.
498: *
499: * Data stored in a socket buffer is maintained as a list of records.
500: * Each record is a list of mbufs chained together with the m_next
501: * field. Records are chained together with the m_nextpkt field. The upper
502: * level routine soreceive() expects the following conventions to be
503: * observed when placing information in the receive buffer:
504: *
505: * 1. If the protocol requires each message be preceded by the sender's
506: * name, then a record containing that name must be present before
507: * any associated data (mbuf's must be of type MT_SONAME).
508: * 2. If the protocol supports the exchange of ``access rights'' (really
509: * just additional data associated with the message), and there are
510: * ``rights'' to be received, then a record containing this data
1.10 mycroft 511: * should be present (mbuf's must be of type MT_CONTROL).
1.1 cgd 512: * 3. If a name or rights record exists, then it must be followed by
513: * a data record, perhaps of zero length.
514: *
515: * Before using a new socket structure it is first necessary to reserve
516: * buffer space to the socket, by calling sbreserve(). This should commit
517: * some of the available buffer space in the system buffer pool for the
518: * socket (currently, it does nothing but enforce limits). The space
519: * should be released by calling sbrelease() when the socket is destroyed.
520: */
521:
1.7 mycroft 522: int
1.58 thorpej 523: sb_max_set(u_long new_sbmax)
524: {
525: int s;
526:
527: if (new_sbmax < (16 * 1024))
528: return (EINVAL);
529:
530: s = splsoftnet();
531: sb_max = new_sbmax;
532: sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
533: splx(s);
534:
535: return (0);
536: }
537:
538: int
1.37 lukem 539: soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1.1 cgd 540: {
1.89.6.2 mjf 541:
542: KASSERT(so->so_lock == NULL || solocked(so));
543:
1.74 christos 544: /*
545: * there's at least one application (a configure script of screen)
546: * which expects a fifo is writable even if it has "some" bytes
547: * in its buffer.
548: * so we want to make sure (hiwat - lowat) >= (some bytes).
549: *
550: * PIPE_BUF here is an arbitrary value chosen as (some bytes) above.
551: * we expect it's large enough for such applications.
552: */
553: u_long lowat = MAX(sock_loan_thresh, MCLBYTES);
554: u_long hiwat = lowat + PIPE_BUF;
1.1 cgd 555:
1.74 christos 556: if (sndcc < hiwat)
557: sndcc = hiwat;
1.59 christos 558: if (sbreserve(&so->so_snd, sndcc, so) == 0)
1.1 cgd 559: goto bad;
1.59 christos 560: if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
1.1 cgd 561: goto bad2;
562: if (so->so_rcv.sb_lowat == 0)
563: so->so_rcv.sb_lowat = 1;
564: if (so->so_snd.sb_lowat == 0)
1.74 christos 565: so->so_snd.sb_lowat = lowat;
1.1 cgd 566: if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
567: so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
568: return (0);
1.37 lukem 569: bad2:
1.59 christos 570: sbrelease(&so->so_snd, so);
1.37 lukem 571: bad:
1.1 cgd 572: return (ENOBUFS);
573: }
574:
575: /*
576: * Allot mbufs to a sockbuf.
577: * Attempt to scale mbmax so that mbcnt doesn't become limiting
578: * if buffering efficiency is near the normal case.
579: */
1.7 mycroft 580: int
1.59 christos 581: sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
1.1 cgd 582: {
1.75 ad 583: struct lwp *l = curlwp; /* XXX */
1.62 christos 584: rlim_t maxcc;
1.67 christos 585: struct uidinfo *uidinfo;
1.1 cgd 586:
1.89.6.2 mjf 587: KASSERT(so->so_lock == NULL || solocked(so));
588: KASSERT(sb->sb_so == so);
589: KASSERT(sb_max_adj != 0);
590:
1.58 thorpej 591: if (cc == 0 || cc > sb_max_adj)
1.1 cgd 592: return (0);
1.89.6.2 mjf 593:
594: if (kauth_cred_geteuid(l->l_cred) == so->so_uidinfo->ui_uid)
595: maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
596: else
1.62 christos 597: maxcc = RLIM_INFINITY;
1.89.6.2 mjf 598:
599: uidinfo = so->so_uidinfo;
1.67 christos 600: if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
1.62 christos 601: return 0;
1.1 cgd 602: sb->sb_mbmax = min(cc * 2, sb_max);
603: if (sb->sb_lowat > sb->sb_hiwat)
604: sb->sb_lowat = sb->sb_hiwat;
605: return (1);
606: }
607:
608: /*
1.89.6.2 mjf 609: * Free mbufs held by a socket, and reserved mbuf space. We do not assert
610: * that the socket is held locked here: see sorflush().
1.1 cgd 611: */
1.7 mycroft 612: void
1.59 christos 613: sbrelease(struct sockbuf *sb, struct socket *so)
1.1 cgd 614: {
615:
1.89.6.2 mjf 616: KASSERT(sb->sb_so == so);
617:
1.1 cgd 618: sbflush(sb);
1.87 yamt 619: (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY);
1.59 christos 620: sb->sb_mbmax = 0;
1.1 cgd 621: }
622:
623: /*
624: * Routines to add and remove
625: * data from an mbuf queue.
626: *
627: * The routines sbappend() or sbappendrecord() are normally called to
628: * append new mbufs to a socket buffer, after checking that adequate
629: * space is available, comparing the function sbspace() with the amount
630: * of data to be added. sbappendrecord() differs from sbappend() in
631: * that data supplied is treated as the beginning of a new record.
632: * To place a sender's address, optional access rights, and data in a
633: * socket receive buffer, sbappendaddr() should be used. To place
634: * access rights and data in a socket receive buffer, sbappendrights()
635: * should be used. In either case, the new data begins a new record.
636: * Note that unlike sbappend() and sbappendrecord(), these routines check
637: * for the caller that there will be enough space to store the data.
638: * Each fails if there is not enough space, or if it cannot find mbufs
639: * to store additional information in.
640: *
641: * Reliable protocols may use the socket send buffer to hold data
642: * awaiting acknowledgement. Data is normally copied from a socket
643: * send buffer in a protocol with m_copy for output to a peer,
644: * and then removing the data from the socket buffer with sbdrop()
645: * or sbdroprecord() when the data is acknowledged by the peer.
646: */
647:
1.43 thorpej 648: #ifdef SOCKBUF_DEBUG
649: void
650: sblastrecordchk(struct sockbuf *sb, const char *where)
651: {
652: struct mbuf *m = sb->sb_mb;
653:
1.89.6.2 mjf 654: KASSERT(solocked(sb->sb_so));
655:
1.43 thorpej 656: while (m && m->m_nextpkt)
657: m = m->m_nextpkt;
658:
659: if (m != sb->sb_lastrecord) {
660: printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
661: sb->sb_mb, sb->sb_lastrecord, m);
662: printf("packet chain:\n");
663: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
664: printf("\t%p\n", m);
1.47 provos 665: panic("sblastrecordchk from %s", where);
1.43 thorpej 666: }
667: }
668:
669: void
670: sblastmbufchk(struct sockbuf *sb, const char *where)
671: {
672: struct mbuf *m = sb->sb_mb;
673: struct mbuf *n;
674:
1.89.6.2 mjf 675: KASSERT(solocked(sb->sb_so));
676:
1.43 thorpej 677: while (m && m->m_nextpkt)
678: m = m->m_nextpkt;
679:
680: while (m && m->m_next)
681: m = m->m_next;
682:
683: if (m != sb->sb_mbtail) {
684: printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
685: sb->sb_mb, sb->sb_mbtail, m);
686: printf("packet tree:\n");
687: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
688: printf("\t");
689: for (n = m; n != NULL; n = n->m_next)
690: printf("%p ", n);
691: printf("\n");
692: }
693: panic("sblastmbufchk from %s", where);
694: }
695: }
696: #endif /* SOCKBUF_DEBUG */
697:
1.63 jonathan 698: /*
699: * Link a chain of records onto a socket buffer
700: */
701: #define SBLINKRECORDCHAIN(sb, m0, mlast) \
1.43 thorpej 702: do { \
703: if ((sb)->sb_lastrecord != NULL) \
704: (sb)->sb_lastrecord->m_nextpkt = (m0); \
705: else \
706: (sb)->sb_mb = (m0); \
1.63 jonathan 707: (sb)->sb_lastrecord = (mlast); \
1.43 thorpej 708: } while (/*CONSTCOND*/0)
709:
1.63 jonathan 710:
711: #define SBLINKRECORD(sb, m0) \
712: SBLINKRECORDCHAIN(sb, m0, m0)
713:
1.1 cgd 714: /*
715: * Append mbuf chain m to the last record in the
716: * socket buffer sb. The additional space associated
717: * the mbuf chain is recorded in sb. Empty mbufs are
718: * discarded and mbufs are compacted where possible.
719: */
1.7 mycroft 720: void
1.37 lukem 721: sbappend(struct sockbuf *sb, struct mbuf *m)
1.1 cgd 722: {
1.37 lukem 723: struct mbuf *n;
1.1 cgd 724:
1.89.6.2 mjf 725: KASSERT(solocked(sb->sb_so));
726:
1.1 cgd 727: if (m == 0)
728: return;
1.43 thorpej 729:
1.49 matt 730: #ifdef MBUFTRACE
1.65 jonathan 731: m_claimm(m, sb->sb_mowner);
1.49 matt 732: #endif
733:
1.43 thorpej 734: SBLASTRECORDCHK(sb, "sbappend 1");
735:
736: if ((n = sb->sb_lastrecord) != NULL) {
737: /*
738: * XXX Would like to simply use sb_mbtail here, but
739: * XXX I need to verify that I won't miss an EOR that
740: * XXX way.
741: */
1.1 cgd 742: do {
743: if (n->m_flags & M_EOR) {
744: sbappendrecord(sb, m); /* XXXXXX!!!! */
745: return;
746: }
747: } while (n->m_next && (n = n->m_next));
1.43 thorpej 748: } else {
749: /*
750: * If this is the first record in the socket buffer, it's
751: * also the last record.
752: */
753: sb->sb_lastrecord = m;
1.1 cgd 754: }
755: sbcompress(sb, m, n);
1.43 thorpej 756: SBLASTRECORDCHK(sb, "sbappend 2");
757: }
758:
759: /*
760: * This version of sbappend() should only be used when the caller
761: * absolutely knows that there will never be more than one record
762: * in the socket buffer, that is, a stream protocol (such as TCP).
763: */
764: void
1.44 thorpej 765: sbappendstream(struct sockbuf *sb, struct mbuf *m)
1.43 thorpej 766: {
767:
1.89.6.2 mjf 768: KASSERT(solocked(sb->sb_so));
1.43 thorpej 769: KDASSERT(m->m_nextpkt == NULL);
770: KASSERT(sb->sb_mb == sb->sb_lastrecord);
771:
772: SBLASTMBUFCHK(sb, __func__);
773:
1.49 matt 774: #ifdef MBUFTRACE
1.65 jonathan 775: m_claimm(m, sb->sb_mowner);
1.49 matt 776: #endif
777:
1.43 thorpej 778: sbcompress(sb, m, sb->sb_mbtail);
779:
780: sb->sb_lastrecord = sb->sb_mb;
781: SBLASTRECORDCHK(sb, __func__);
1.1 cgd 782: }
783:
784: #ifdef SOCKBUF_DEBUG
1.7 mycroft 785: void
1.37 lukem 786: sbcheck(struct sockbuf *sb)
1.1 cgd 787: {
1.89.6.2 mjf 788: struct mbuf *m, *m2;
1.43 thorpej 789: u_long len, mbcnt;
1.1 cgd 790:
1.89.6.2 mjf 791: KASSERT(solocked(sb->sb_so));
792:
1.37 lukem 793: len = 0;
794: mbcnt = 0;
1.89.6.2 mjf 795: for (m = sb->sb_mb; m; m = m->m_nextpkt) {
796: for (m2 = m; m2 != NULL; m2 = m2->m_next) {
797: len += m2->m_len;
798: mbcnt += MSIZE;
799: if (m2->m_flags & M_EXT)
800: mbcnt += m2->m_ext.ext_size;
801: if (m2->m_nextpkt != NULL)
802: panic("sbcheck nextpkt");
803: }
1.1 cgd 804: }
805: if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1.43 thorpej 806: printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
1.1 cgd 807: mbcnt, sb->sb_mbcnt);
808: panic("sbcheck");
809: }
810: }
811: #endif
812:
813: /*
814: * As above, except the mbuf chain
815: * begins a new record.
816: */
1.7 mycroft 817: void
1.37 lukem 818: sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 819: {
1.37 lukem 820: struct mbuf *m;
1.1 cgd 821:
1.89.6.2 mjf 822: KASSERT(solocked(sb->sb_so));
823:
1.1 cgd 824: if (m0 == 0)
825: return;
1.43 thorpej 826:
1.49 matt 827: #ifdef MBUFTRACE
1.65 jonathan 828: m_claimm(m0, sb->sb_mowner);
1.49 matt 829: #endif
1.1 cgd 830: /*
831: * Put the first mbuf on the queue.
832: * Note this permits zero length records.
833: */
834: sballoc(sb, m0);
1.43 thorpej 835: SBLASTRECORDCHK(sb, "sbappendrecord 1");
836: SBLINKRECORD(sb, m0);
1.1 cgd 837: m = m0->m_next;
838: m0->m_next = 0;
839: if (m && (m0->m_flags & M_EOR)) {
840: m0->m_flags &= ~M_EOR;
841: m->m_flags |= M_EOR;
842: }
843: sbcompress(sb, m, m0);
1.43 thorpej 844: SBLASTRECORDCHK(sb, "sbappendrecord 2");
1.1 cgd 845: }
846:
847: /*
848: * As above except that OOB data
849: * is inserted at the beginning of the sockbuf,
850: * but after any other OOB data.
851: */
1.7 mycroft 852: void
1.37 lukem 853: sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 854: {
1.37 lukem 855: struct mbuf *m, **mp;
1.1 cgd 856:
1.89.6.2 mjf 857: KASSERT(solocked(sb->sb_so));
858:
1.1 cgd 859: if (m0 == 0)
860: return;
1.43 thorpej 861:
862: SBLASTRECORDCHK(sb, "sbinsertoob 1");
863:
1.11 christos 864: for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
1.1 cgd 865: again:
866: switch (m->m_type) {
867:
868: case MT_OOBDATA:
869: continue; /* WANT next train */
870:
871: case MT_CONTROL:
1.11 christos 872: if ((m = m->m_next) != NULL)
1.1 cgd 873: goto again; /* inspect THIS train further */
874: }
875: break;
876: }
877: /*
878: * Put the first mbuf on the queue.
879: * Note this permits zero length records.
880: */
881: sballoc(sb, m0);
882: m0->m_nextpkt = *mp;
1.43 thorpej 883: if (*mp == NULL) {
884: /* m0 is actually the new tail */
885: sb->sb_lastrecord = m0;
886: }
1.1 cgd 887: *mp = m0;
888: m = m0->m_next;
889: m0->m_next = 0;
890: if (m && (m0->m_flags & M_EOR)) {
891: m0->m_flags &= ~M_EOR;
892: m->m_flags |= M_EOR;
893: }
894: sbcompress(sb, m, m0);
1.43 thorpej 895: SBLASTRECORDCHK(sb, "sbinsertoob 2");
1.1 cgd 896: }
897:
898: /*
899: * Append address and data, and optionally, control (ancillary) data
900: * to the receive queue of a socket. If present,
901: * m0 must include a packet header with total length.
902: * Returns 0 if no space in sockbuf or insufficient mbufs.
903: */
1.7 mycroft 904: int
1.61 matt 905: sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
1.37 lukem 906: struct mbuf *control)
1.1 cgd 907: {
1.43 thorpej 908: struct mbuf *m, *n, *nlast;
1.50 fvdl 909: int space, len;
1.1 cgd 910:
1.89.6.2 mjf 911: KASSERT(solocked(sb->sb_so));
912:
1.37 lukem 913: space = asa->sa_len;
914:
1.49 matt 915: if (m0 != NULL) {
916: if ((m0->m_flags & M_PKTHDR) == 0)
917: panic("sbappendaddr");
1.1 cgd 918: space += m0->m_pkthdr.len;
1.49 matt 919: #ifdef MBUFTRACE
1.65 jonathan 920: m_claimm(m0, sb->sb_mowner);
1.49 matt 921: #endif
922: }
1.1 cgd 923: for (n = control; n; n = n->m_next) {
924: space += n->m_len;
1.49 matt 925: MCLAIM(n, sb->sb_mowner);
1.1 cgd 926: if (n->m_next == 0) /* keep pointer to last control buf */
927: break;
928: }
929: if (space > sbspace(sb))
930: return (0);
931: MGET(m, M_DONTWAIT, MT_SONAME);
932: if (m == 0)
933: return (0);
1.49 matt 934: MCLAIM(m, sb->sb_mowner);
1.50 fvdl 935: /*
936: * XXX avoid 'comparison always true' warning which isn't easily
937: * avoided.
938: */
939: len = asa->sa_len;
940: if (len > MLEN) {
1.20 thorpej 941: MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
942: if ((m->m_flags & M_EXT) == 0) {
943: m_free(m);
944: return (0);
945: }
946: }
1.1 cgd 947: m->m_len = asa->sa_len;
1.82 christos 948: memcpy(mtod(m, void *), asa, asa->sa_len);
1.1 cgd 949: if (n)
950: n->m_next = m0; /* concatenate data to control */
951: else
952: control = m0;
953: m->m_next = control;
1.43 thorpej 954:
955: SBLASTRECORDCHK(sb, "sbappendaddr 1");
956:
957: for (n = m; n->m_next != NULL; n = n->m_next)
1.1 cgd 958: sballoc(sb, n);
1.43 thorpej 959: sballoc(sb, n);
960: nlast = n;
961: SBLINKRECORD(sb, m);
962:
963: sb->sb_mbtail = nlast;
964: SBLASTMBUFCHK(sb, "sbappendaddr");
965: SBLASTRECORDCHK(sb, "sbappendaddr 2");
966:
1.1 cgd 967: return (1);
968: }
969:
1.63 jonathan 970: /*
971: * Helper for sbappendchainaddr: prepend a struct sockaddr* to
972: * an mbuf chain.
973: */
1.70 perry 974: static inline struct mbuf *
1.81 yamt 975: m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
1.64 jonathan 976: const struct sockaddr *asa)
1.63 jonathan 977: {
978: struct mbuf *m;
1.64 jonathan 979: const int salen = asa->sa_len;
1.63 jonathan 980:
1.89.6.2 mjf 981: KASSERT(solocked(sb->sb_so));
982:
1.63 jonathan 983: /* only the first in each chain need be a pkthdr */
984: MGETHDR(m, M_DONTWAIT, MT_SONAME);
985: if (m == 0)
986: return (0);
987: MCLAIM(m, sb->sb_mowner);
1.64 jonathan 988: #ifdef notyet
989: if (salen > MHLEN) {
990: MEXTMALLOC(m, salen, M_NOWAIT);
991: if ((m->m_flags & M_EXT) == 0) {
992: m_free(m);
993: return (0);
994: }
995: }
996: #else
997: KASSERT(salen <= MHLEN);
998: #endif
999: m->m_len = salen;
1.82 christos 1000: memcpy(mtod(m, void *), asa, salen);
1.63 jonathan 1001: m->m_next = m0;
1.64 jonathan 1002: m->m_pkthdr.len = salen + m0->m_pkthdr.len;
1.63 jonathan 1003:
1004: return m;
1005: }
1006:
1007: int
1008: sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
1009: struct mbuf *m0, int sbprio)
1010: {
1011: int space;
1012: struct mbuf *m, *n, *n0, *nlast;
1013: int error;
1014:
1.89.6.2 mjf 1015: KASSERT(solocked(sb->sb_so));
1016:
1.63 jonathan 1017: /*
1018: * XXX sbprio reserved for encoding priority of this* request:
1019: * SB_PRIO_NONE --> honour normal sb limits
1020: * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
1021: * take whole chain. Intended for large requests
1022: * that should be delivered atomically (all, or none).
1023: * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
1024: * over normal socket limits, for messages indicating
1025: * buffer overflow in earlier normal/lower-priority messages
1026: * SB_PRIO_BESTEFFORT --> ignore limits entirely.
1027: * Intended for kernel-generated messages only.
1028: * Up to generator to avoid total mbuf resource exhaustion.
1029: */
1030: (void)sbprio;
1031:
1032: if (m0 && (m0->m_flags & M_PKTHDR) == 0)
1033: panic("sbappendaddrchain");
1034:
1035: space = sbspace(sb);
1.66 perry 1036:
1.63 jonathan 1037: #ifdef notyet
1.66 perry 1038: /*
1.63 jonathan 1039: * Enforce SB_PRIO_* limits as described above.
1040: */
1041: #endif
1042:
1043: n0 = NULL;
1044: nlast = NULL;
1045: for (m = m0; m; m = m->m_nextpkt) {
1046: struct mbuf *np;
1047:
1.64 jonathan 1048: #ifdef MBUFTRACE
1.65 jonathan 1049: m_claimm(m, sb->sb_mowner);
1.64 jonathan 1050: #endif
1051:
1.63 jonathan 1052: /* Prepend sockaddr to this record (m) of input chain m0 */
1.64 jonathan 1053: n = m_prepend_sockaddr(sb, m, asa);
1.63 jonathan 1054: if (n == NULL) {
1055: error = ENOBUFS;
1056: goto bad;
1057: }
1058:
1059: /* Append record (asa+m) to end of new chain n0 */
1060: if (n0 == NULL) {
1061: n0 = n;
1062: } else {
1063: nlast->m_nextpkt = n;
1064: }
1065: /* Keep track of last record on new chain */
1066: nlast = n;
1067:
1068: for (np = n; np; np = np->m_next)
1069: sballoc(sb, np);
1070: }
1071:
1.64 jonathan 1072: SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
1073:
1.63 jonathan 1074: /* Drop the entire chain of (asa+m) records onto the socket */
1075: SBLINKRECORDCHAIN(sb, n0, nlast);
1.64 jonathan 1076:
1077: SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
1078:
1.63 jonathan 1079: for (m = nlast; m->m_next; m = m->m_next)
1080: ;
1081: sb->sb_mbtail = m;
1.64 jonathan 1082: SBLASTMBUFCHK(sb, "sbappendaddrchain");
1083:
1.63 jonathan 1084: return (1);
1085:
1086: bad:
1.64 jonathan 1087: /*
1088: * On error, free the prepended addreseses. For consistency
1089: * with sbappendaddr(), leave it to our caller to free
1090: * the input record chain passed to us as m0.
1091: */
1092: while ((n = n0) != NULL) {
1093: struct mbuf *np;
1094:
1095: /* Undo the sballoc() of this record */
1096: for (np = n; np; np = np->m_next)
1097: sbfree(sb, np);
1098:
1099: n0 = n->m_nextpkt; /* iterate at next prepended address */
1100: MFREE(n, np); /* free prepended address (not data) */
1101: }
1.66 perry 1102: return 0;
1.63 jonathan 1103: }
1104:
1105:
1.7 mycroft 1106: int
1.37 lukem 1107: sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
1.1 cgd 1108: {
1.43 thorpej 1109: struct mbuf *m, *mlast, *n;
1.37 lukem 1110: int space;
1.1 cgd 1111:
1.89.6.2 mjf 1112: KASSERT(solocked(sb->sb_so));
1113:
1.37 lukem 1114: space = 0;
1.1 cgd 1115: if (control == 0)
1116: panic("sbappendcontrol");
1117: for (m = control; ; m = m->m_next) {
1118: space += m->m_len;
1.49 matt 1119: MCLAIM(m, sb->sb_mowner);
1.1 cgd 1120: if (m->m_next == 0)
1121: break;
1122: }
1123: n = m; /* save pointer to last control buffer */
1.49 matt 1124: for (m = m0; m; m = m->m_next) {
1125: MCLAIM(m, sb->sb_mowner);
1.1 cgd 1126: space += m->m_len;
1.49 matt 1127: }
1.1 cgd 1128: if (space > sbspace(sb))
1129: return (0);
1130: n->m_next = m0; /* concatenate data to control */
1.43 thorpej 1131:
1132: SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1133:
1134: for (m = control; m->m_next != NULL; m = m->m_next)
1.1 cgd 1135: sballoc(sb, m);
1.43 thorpej 1136: sballoc(sb, m);
1137: mlast = m;
1138: SBLINKRECORD(sb, control);
1139:
1140: sb->sb_mbtail = mlast;
1141: SBLASTMBUFCHK(sb, "sbappendcontrol");
1142: SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1143:
1.1 cgd 1144: return (1);
1145: }
1146:
1147: /*
1148: * Compress mbuf chain m into the socket
1149: * buffer sb following mbuf n. If n
1150: * is null, the buffer is presumed empty.
1151: */
1.7 mycroft 1152: void
1.37 lukem 1153: sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1.1 cgd 1154: {
1.37 lukem 1155: int eor;
1156: struct mbuf *o;
1.1 cgd 1157:
1.89.6.2 mjf 1158: KASSERT(solocked(sb->sb_so));
1159:
1.37 lukem 1160: eor = 0;
1.1 cgd 1161: while (m) {
1162: eor |= m->m_flags & M_EOR;
1163: if (m->m_len == 0 &&
1164: (eor == 0 ||
1165: (((o = m->m_next) || (o = n)) &&
1166: o->m_type == m->m_type))) {
1.46 thorpej 1167: if (sb->sb_lastrecord == m)
1168: sb->sb_lastrecord = m->m_next;
1.1 cgd 1169: m = m_free(m);
1170: continue;
1171: }
1.40 thorpej 1172: if (n && (n->m_flags & M_EOR) == 0 &&
1173: /* M_TRAILINGSPACE() checks buffer writeability */
1174: m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
1175: m->m_len <= M_TRAILINGSPACE(n) &&
1176: n->m_type == m->m_type) {
1.82 christos 1177: memcpy(mtod(n, char *) + n->m_len, mtod(m, void *),
1.1 cgd 1178: (unsigned)m->m_len);
1179: n->m_len += m->m_len;
1180: sb->sb_cc += m->m_len;
1181: m = m_free(m);
1182: continue;
1183: }
1184: if (n)
1185: n->m_next = m;
1186: else
1187: sb->sb_mb = m;
1.43 thorpej 1188: sb->sb_mbtail = m;
1.1 cgd 1189: sballoc(sb, m);
1190: n = m;
1191: m->m_flags &= ~M_EOR;
1192: m = m->m_next;
1193: n->m_next = 0;
1194: }
1195: if (eor) {
1196: if (n)
1197: n->m_flags |= eor;
1198: else
1.15 christos 1199: printf("semi-panic: sbcompress\n");
1.1 cgd 1200: }
1.43 thorpej 1201: SBLASTMBUFCHK(sb, __func__);
1.1 cgd 1202: }
1203:
1204: /*
1205: * Free all mbufs in a sockbuf.
1206: * Check that all resources are reclaimed.
1207: */
1.7 mycroft 1208: void
1.37 lukem 1209: sbflush(struct sockbuf *sb)
1.1 cgd 1210: {
1211:
1.89.6.2 mjf 1212: KASSERT(solocked(sb->sb_so));
1.43 thorpej 1213: KASSERT((sb->sb_flags & SB_LOCK) == 0);
1214:
1.1 cgd 1215: while (sb->sb_mbcnt)
1216: sbdrop(sb, (int)sb->sb_cc);
1.43 thorpej 1217:
1218: KASSERT(sb->sb_cc == 0);
1219: KASSERT(sb->sb_mb == NULL);
1220: KASSERT(sb->sb_mbtail == NULL);
1221: KASSERT(sb->sb_lastrecord == NULL);
1.1 cgd 1222: }
1223:
1224: /*
1225: * Drop data from (the front of) a sockbuf.
1226: */
1.7 mycroft 1227: void
1.37 lukem 1228: sbdrop(struct sockbuf *sb, int len)
1.1 cgd 1229: {
1.37 lukem 1230: struct mbuf *m, *mn, *next;
1.1 cgd 1231:
1.89.6.2 mjf 1232: KASSERT(solocked(sb->sb_so));
1233:
1.1 cgd 1234: next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1235: while (len > 0) {
1236: if (m == 0) {
1237: if (next == 0)
1238: panic("sbdrop");
1239: m = next;
1240: next = m->m_nextpkt;
1241: continue;
1242: }
1243: if (m->m_len > len) {
1244: m->m_len -= len;
1245: m->m_data += len;
1246: sb->sb_cc -= len;
1247: break;
1248: }
1249: len -= m->m_len;
1250: sbfree(sb, m);
1251: MFREE(m, mn);
1252: m = mn;
1253: }
1254: while (m && m->m_len == 0) {
1255: sbfree(sb, m);
1256: MFREE(m, mn);
1257: m = mn;
1258: }
1259: if (m) {
1260: sb->sb_mb = m;
1261: m->m_nextpkt = next;
1262: } else
1263: sb->sb_mb = next;
1.43 thorpej 1264: /*
1.45 thorpej 1265: * First part is an inline SB_EMPTY_FIXUP(). Second part
1.43 thorpej 1266: * makes sure sb_lastrecord is up-to-date if we dropped
1267: * part of the last record.
1268: */
1269: m = sb->sb_mb;
1270: if (m == NULL) {
1271: sb->sb_mbtail = NULL;
1272: sb->sb_lastrecord = NULL;
1273: } else if (m->m_nextpkt == NULL)
1274: sb->sb_lastrecord = m;
1.1 cgd 1275: }
1276:
1277: /*
1278: * Drop a record off the front of a sockbuf
1279: * and move the next record to the front.
1280: */
1.7 mycroft 1281: void
1.37 lukem 1282: sbdroprecord(struct sockbuf *sb)
1.1 cgd 1283: {
1.37 lukem 1284: struct mbuf *m, *mn;
1.1 cgd 1285:
1.89.6.2 mjf 1286: KASSERT(solocked(sb->sb_so));
1287:
1.1 cgd 1288: m = sb->sb_mb;
1289: if (m) {
1290: sb->sb_mb = m->m_nextpkt;
1291: do {
1292: sbfree(sb, m);
1293: MFREE(m, mn);
1.11 christos 1294: } while ((m = mn) != NULL);
1.1 cgd 1295: }
1.45 thorpej 1296: SB_EMPTY_FIXUP(sb);
1.19 thorpej 1297: }
1298:
1299: /*
1300: * Create a "control" mbuf containing the specified data
1301: * with the specified type for presentation on a socket buffer.
1302: */
1303: struct mbuf *
1.82 christos 1304: sbcreatecontrol(void *p, int size, int type, int level)
1.19 thorpej 1305: {
1.37 lukem 1306: struct cmsghdr *cp;
1307: struct mbuf *m;
1.19 thorpej 1308:
1.35 itojun 1309: if (CMSG_SPACE(size) > MCLBYTES) {
1.30 itojun 1310: printf("sbcreatecontrol: message too large %d\n", size);
1311: return NULL;
1312: }
1313:
1.19 thorpej 1314: if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
1315: return ((struct mbuf *) NULL);
1.35 itojun 1316: if (CMSG_SPACE(size) > MLEN) {
1.30 itojun 1317: MCLGET(m, M_DONTWAIT);
1318: if ((m->m_flags & M_EXT) == 0) {
1319: m_free(m);
1320: return NULL;
1321: }
1322: }
1.19 thorpej 1323: cp = mtod(m, struct cmsghdr *);
1.26 perry 1324: memcpy(CMSG_DATA(cp), p, size);
1.35 itojun 1325: m->m_len = CMSG_SPACE(size);
1326: cp->cmsg_len = CMSG_LEN(size);
1.19 thorpej 1327: cp->cmsg_level = level;
1328: cp->cmsg_type = type;
1329: return (m);
1.1 cgd 1330: }
1.89.6.2 mjf 1331:
1332: void
1333: solockretry(struct socket *so, kmutex_t *lock)
1334: {
1335:
1336: while (lock != so->so_lock) {
1337: mutex_exit(lock);
1338: lock = so->so_lock;
1339: mutex_enter(lock);
1340: }
1341: }
1342:
1343: bool
1344: solocked(struct socket *so)
1345: {
1346:
1347: return mutex_owned(so->so_lock);
1348: }
1349:
1350: bool
1351: solocked2(struct socket *so1, struct socket *so2)
1352: {
1353: kmutex_t *lock;
1354:
1355: lock = so1->so_lock;
1356: if (lock != so2->so_lock)
1357: return false;
1358: return mutex_owned(lock);
1359: }
1360:
1361: /*
1362: * Assign a default lock to a new socket. For PRU_ATTACH, and done by
1363: * protocols that do not have special locking requirements.
1364: */
1365: void
1366: sosetlock(struct socket *so)
1367: {
1368: kmutex_t *lock;
1369:
1370: if (so->so_lock == NULL) {
1371: lock = softnet_lock;
1372: so->so_lock = lock;
1373: mutex_obj_hold(lock);
1374: mutex_enter(lock);
1375: }
1376:
1377: /* In all cases, lock must be held on return from PRU_ATTACH. */
1378: KASSERT(solocked(so));
1379: }
1380:
1381: /*
1382: * Set lock on sockbuf sb; sleep if lock is already held.
1383: * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1384: * Returns error without lock if sleep is interrupted.
1385: */
1386: int
1387: sblock(struct sockbuf *sb, int wf)
1388: {
1389: struct socket *so;
1390: kmutex_t *lock;
1391: int error;
1392:
1393: KASSERT(solocked(sb->sb_so));
1394:
1395: for (;;) {
1396: if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) {
1397: sb->sb_flags |= SB_LOCK;
1398: return 0;
1399: }
1400: if (wf != M_WAITOK)
1401: return EWOULDBLOCK;
1402: so = sb->sb_so;
1403: lock = so->so_lock;
1404: if ((sb->sb_flags & SB_NOINTR) != 0) {
1405: cv_wait(&so->so_cv, lock);
1406: error = 0;
1407: } else
1408: error = cv_wait_sig(&so->so_cv, lock);
1409: if (__predict_false(lock != so->so_lock))
1410: solockretry(so, lock);
1411: if (error != 0)
1412: return error;
1413: }
1414: }
1415:
1416: void
1417: sbunlock(struct sockbuf *sb)
1418: {
1419: struct socket *so;
1420:
1421: so = sb->sb_so;
1422:
1423: KASSERT(solocked(so));
1424: KASSERT((sb->sb_flags & SB_LOCK) != 0);
1425:
1426: sb->sb_flags &= ~SB_LOCK;
1427: cv_broadcast(&so->so_cv);
1428: }
1429:
1430: int
1431: sowait(struct socket *so, int timo)
1432: {
1433: kmutex_t *lock;
1434: int error;
1435:
1436: KASSERT(solocked(so));
1437:
1438: lock = so->so_lock;
1439: error = cv_timedwait_sig(&so->so_cv, lock, timo);
1440: if (__predict_false(lock != so->so_lock))
1441: solockretry(so, lock);
1442: return error;
1443: }
CVSweb <webmaster@jp.NetBSD.org>