Annotation of src/sys/kern/uipc_socket2.c, Revision 1.109.2.2
1.109.2.2! yamt 1: /* $NetBSD: uipc_socket2.c,v 1.109.2.1 2012/04/17 00:08:31 yamt Exp $ */
1.91 ad 2:
3: /*-
4: * Copyright (c) 2008 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * Redistribution and use in source and binary forms, with or without
8: * modification, are permitted provided that the following conditions
9: * are met:
10: * 1. Redistributions of source code must retain the above copyright
11: * notice, this list of conditions and the following disclaimer.
12: * 2. Redistributions in binary form must reproduce the above copyright
13: * notice, this list of conditions and the following disclaimer in the
14: * documentation and/or other materials provided with the distribution.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26: * POSSIBILITY OF SUCH DAMAGE.
27: */
1.9 cgd 28:
1.1 cgd 29: /*
1.7 mycroft 30: * Copyright (c) 1982, 1986, 1988, 1990, 1993
31: * The Regents of the University of California. All rights reserved.
1.1 cgd 32: *
33: * Redistribution and use in source and binary forms, with or without
34: * modification, are permitted provided that the following conditions
35: * are met:
36: * 1. Redistributions of source code must retain the above copyright
37: * notice, this list of conditions and the following disclaimer.
38: * 2. Redistributions in binary form must reproduce the above copyright
39: * notice, this list of conditions and the following disclaimer in the
40: * documentation and/or other materials provided with the distribution.
1.54 agc 41: * 3. Neither the name of the University nor the names of its contributors
1.1 cgd 42: * may be used to endorse or promote products derived from this software
43: * without specific prior written permission.
44: *
45: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55: * SUCH DAMAGE.
56: *
1.23 fvdl 57: * @(#)uipc_socket2.c 8.2 (Berkeley) 2/14/95
1.1 cgd 58: */
1.42 lukem 59:
60: #include <sys/cdefs.h>
1.109.2.2! yamt 61: __KERNEL_RCSID(0, "$NetBSD: uipc_socket2.c,v 1.109.2.1 2012/04/17 00:08:31 yamt Exp $");
1.51 martin 62:
63: #include "opt_mbuftrace.h"
1.58 thorpej 64: #include "opt_sb_max.h"
1.1 cgd 65:
1.5 mycroft 66: #include <sys/param.h>
67: #include <sys/systm.h>
68: #include <sys/proc.h>
69: #include <sys/file.h>
70: #include <sys/buf.h>
71: #include <sys/mbuf.h>
72: #include <sys/protosw.h>
1.91 ad 73: #include <sys/domain.h>
1.55 christos 74: #include <sys/poll.h>
1.5 mycroft 75: #include <sys/socket.h>
76: #include <sys/socketvar.h>
1.11 christos 77: #include <sys/signalvar.h>
1.71 elad 78: #include <sys/kauth.h>
1.91 ad 79: #include <sys/pool.h>
1.98 pooka 80: #include <sys/uidinfo.h>
1.1 cgd 81:
82: /*
1.91 ad 83: * Primitive routines for operating on sockets and socket buffers.
84: *
85: * Locking rules and assumptions:
86: *
87: * o socket::so_lock can change on the fly. The low level routines used
88: * to lock sockets are aware of this. When so_lock is acquired, the
89: * routine locking must check to see if so_lock still points to the
90: * lock that was acquired. If so_lock has changed in the meantime, the
91: * now irellevant lock that was acquired must be dropped and the lock
92: * operation retried. Although not proven here, this is completely safe
93: * on a multiprocessor system, even with relaxed memory ordering, given
94: * the next two rules:
95: *
96: * o In order to mutate so_lock, the lock pointed to by the current value
97: * of so_lock must be held: i.e., the socket must be held locked by the
98: * changing thread. The thread must issue membar_exit() to prevent
99: * memory accesses being reordered, and can set so_lock to the desired
100: * value. If the lock pointed to by the new value of so_lock is not
101: * held by the changing thread, the socket must then be considered
102: * unlocked.
103: *
104: * o If so_lock is mutated, and the previous lock referred to by so_lock
105: * could still be visible to other threads in the system (e.g. via file
106: * descriptor or protocol-internal reference), then the old lock must
107: * remain valid until the socket and/or protocol control block has been
108: * torn down.
109: *
110: * o If a socket has a non-NULL so_head value (i.e. is in the process of
111: * connecting), then locking the socket must also lock the socket pointed
112: * to by so_head: their lock pointers must match.
113: *
114: * o If a socket has connections in progress (so_q, so_q0 not empty) then
115: * locking the socket must also lock the sockets attached to both queues.
116: * Again, their lock pointers must match.
117: *
118: * o Beyond the initial lock assigment in socreate(), assigning locks to
119: * sockets is the responsibility of the individual protocols / protocol
120: * domains.
1.1 cgd 121: */
122:
1.94 ad 123: static pool_cache_t socket_cache;
1.1 cgd 124:
1.58 thorpej 125: u_long sb_max = SB_MAX; /* maximum socket buffer size */
126: static u_long sb_max_adj; /* adjusted sb_max */
127:
1.1 cgd 128: /*
129: * Procedures to manipulate state flags of socket
130: * and do appropriate wakeups. Normal sequence from the
131: * active (originating) side is that soisconnecting() is
132: * called during processing of connect() call,
133: * resulting in an eventual call to soisconnected() if/when the
134: * connection is established. When the connection is torn down
135: * soisdisconnecting() is called during processing of disconnect() call,
136: * and soisdisconnected() is called when the connection to the peer
137: * is totally severed. The semantics of these routines are such that
138: * connectionless protocols can call soisconnected() and soisdisconnected()
139: * only, bypassing the in-progress calls when setting up a ``connection''
140: * takes no time.
141: *
142: * From the passive side, a socket is created with
143: * two queues of sockets: so_q0 for connections in progress
144: * and so_q for connections already made and awaiting user acceptance.
145: * As a protocol is preparing incoming connections, it creates a socket
146: * structure queued on so_q0 by calling sonewconn(). When the connection
147: * is established, soisconnected() is called, and transfers the
148: * socket structure to so_q, making it available to accept().
1.66 perry 149: *
1.1 cgd 150: * If a socket is closed with sockets on either
151: * so_q0 or so_q, these sockets are dropped.
152: *
153: * If higher level protocols are implemented in
154: * the kernel, the wakeups done here will sometimes
155: * cause software-interrupt process scheduling.
156: */
157:
1.7 mycroft 158: void
1.37 lukem 159: soisconnecting(struct socket *so)
1.1 cgd 160: {
161:
1.91 ad 162: KASSERT(solocked(so));
163:
1.1 cgd 164: so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
165: so->so_state |= SS_ISCONNECTING;
166: }
167:
1.7 mycroft 168: void
1.37 lukem 169: soisconnected(struct socket *so)
1.1 cgd 170: {
1.37 lukem 171: struct socket *head;
1.1 cgd 172:
1.37 lukem 173: head = so->so_head;
1.91 ad 174:
175: KASSERT(solocked(so));
176: KASSERT(head == NULL || solocked2(so, head));
177:
1.109.2.2! yamt 178: so->so_state &= ~(SS_ISCONNECTING | SS_ISDISCONNECTING);
1.1 cgd 179: so->so_state |= SS_ISCONNECTED;
1.97 tls 180: if (head && so->so_onq == &head->so_q0) {
181: if ((so->so_options & SO_ACCEPTFILTER) == 0) {
182: soqremque(so, 0);
183: soqinsque(head, so, 1);
184: sorwakeup(head);
185: cv_broadcast(&head->so_cv);
186: } else {
187: so->so_upcall =
188: head->so_accf->so_accept_filter->accf_callback;
189: so->so_upcallarg = head->so_accf->so_accept_filter_arg;
190: so->so_rcv.sb_flags |= SB_UPCALL;
191: so->so_options &= ~SO_ACCEPTFILTER;
1.104 tls 192: (*so->so_upcall)(so, so->so_upcallarg,
193: POLLIN|POLLRDNORM, M_DONTWAIT);
1.101 yamt 194: }
1.1 cgd 195: } else {
1.91 ad 196: cv_broadcast(&so->so_cv);
1.1 cgd 197: sorwakeup(so);
198: sowwakeup(so);
199: }
200: }
201:
1.7 mycroft 202: void
1.37 lukem 203: soisdisconnecting(struct socket *so)
1.1 cgd 204: {
205:
1.91 ad 206: KASSERT(solocked(so));
207:
1.1 cgd 208: so->so_state &= ~SS_ISCONNECTING;
209: so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
1.91 ad 210: cv_broadcast(&so->so_cv);
1.1 cgd 211: sowwakeup(so);
212: sorwakeup(so);
213: }
214:
1.7 mycroft 215: void
1.37 lukem 216: soisdisconnected(struct socket *so)
1.1 cgd 217: {
218:
1.91 ad 219: KASSERT(solocked(so));
220:
1.1 cgd 221: so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
1.27 mycroft 222: so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE|SS_ISDISCONNECTED);
1.91 ad 223: cv_broadcast(&so->so_cv);
1.1 cgd 224: sowwakeup(so);
225: sorwakeup(so);
226: }
227:
1.94 ad 228: void
229: soinit2(void)
230: {
231:
232: socket_cache = pool_cache_init(sizeof(struct socket), 0, 0, 0,
233: "socket", NULL, IPL_SOFTNET, NULL, NULL, NULL);
234: }
235:
1.1 cgd 236: /*
237: * When an attempt at a new connection is noted on a socket
238: * which accepts connections, sonewconn is called. If the
239: * connection is possible (subject to space constraints, etc.)
240: * then we allocate a new structure, propoerly linked into the
241: * data structure of the original socket, and return this.
242: */
243: struct socket *
1.109.2.2! yamt 244: sonewconn(struct socket *head, bool conncomplete)
1.1 cgd 245: {
1.37 lukem 246: struct socket *so;
1.91 ad 247: int soqueue, error;
248:
249: KASSERT(solocked(head));
1.1 cgd 250:
1.97 tls 251: if ((head->so_options & SO_ACCEPTFILTER) != 0)
1.109.2.2! yamt 252: conncomplete = false;
! 253: soqueue = conncomplete ? 1 : 0;
! 254:
1.1 cgd 255: if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
1.100 dyoung 256: return NULL;
1.91 ad 257: so = soget(false);
1.66 perry 258: if (so == NULL)
1.100 dyoung 259: return NULL;
1.91 ad 260: mutex_obj_hold(head->so_lock);
261: so->so_lock = head->so_lock;
1.1 cgd 262: so->so_type = head->so_type;
263: so->so_options = head->so_options &~ SO_ACCEPTCONN;
264: so->so_linger = head->so_linger;
265: so->so_state = head->so_state | SS_NOFDREF;
266: so->so_proto = head->so_proto;
267: so->so_timeo = head->so_timeo;
268: so->so_pgid = head->so_pgid;
1.24 matt 269: so->so_send = head->so_send;
270: so->so_receive = head->so_receive;
1.67 christos 271: so->so_uidinfo = head->so_uidinfo;
1.96 yamt 272: so->so_cpid = head->so_cpid;
1.49 matt 273: #ifdef MBUFTRACE
274: so->so_mowner = head->so_mowner;
275: so->so_rcv.sb_mowner = head->so_rcv.sb_mowner;
276: so->so_snd.sb_mowner = head->so_snd.sb_mowner;
277: #endif
1.103 christos 278: if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) != 0)
279: goto out;
1.83 tls 280: so->so_snd.sb_lowat = head->so_snd.sb_lowat;
281: so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
1.84 tls 282: so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
283: so->so_snd.sb_timeo = head->so_snd.sb_timeo;
1.107 christos 284: so->so_rcv.sb_flags |= head->so_rcv.sb_flags & (SB_AUTOSIZE | SB_ASYNC);
285: so->so_snd.sb_flags |= head->so_snd.sb_flags & (SB_AUTOSIZE | SB_ASYNC);
1.1 cgd 286: soqinsque(head, so, soqueue);
1.91 ad 287: error = (*so->so_proto->pr_usrreq)(so, PRU_ATTACH, NULL, NULL,
288: NULL, NULL);
289: KASSERT(solocked(so));
290: if (error != 0) {
1.1 cgd 291: (void) soqremque(so, soqueue);
1.103 christos 292: out:
1.99 ad 293: /*
294: * Remove acccept filter if one is present.
295: * XXX Is this really needed?
296: */
1.97 tls 297: if (so->so_accf != NULL)
1.99 ad 298: (void)accept_filt_clear(so);
1.91 ad 299: soput(so);
1.100 dyoung 300: return NULL;
1.1 cgd 301: }
1.109.2.2! yamt 302: if (conncomplete) {
1.1 cgd 303: sorwakeup(head);
1.91 ad 304: cv_broadcast(&head->so_cv);
1.109.2.2! yamt 305: so->so_state |= SS_ISCONNECTED;
1.1 cgd 306: }
1.100 dyoung 307: return so;
1.1 cgd 308: }
309:
1.91 ad 310: struct socket *
311: soget(bool waitok)
312: {
313: struct socket *so;
314:
1.94 ad 315: so = pool_cache_get(socket_cache, (waitok ? PR_WAITOK : PR_NOWAIT));
1.91 ad 316: if (__predict_false(so == NULL))
317: return (NULL);
318: memset(so, 0, sizeof(*so));
319: TAILQ_INIT(&so->so_q0);
320: TAILQ_INIT(&so->so_q);
321: cv_init(&so->so_cv, "socket");
322: cv_init(&so->so_rcv.sb_cv, "netio");
323: cv_init(&so->so_snd.sb_cv, "netio");
324: selinit(&so->so_rcv.sb_sel);
325: selinit(&so->so_snd.sb_sel);
326: so->so_rcv.sb_so = so;
327: so->so_snd.sb_so = so;
328: return so;
329: }
330:
331: void
332: soput(struct socket *so)
333: {
334:
335: KASSERT(!cv_has_waiters(&so->so_cv));
336: KASSERT(!cv_has_waiters(&so->so_rcv.sb_cv));
337: KASSERT(!cv_has_waiters(&so->so_snd.sb_cv));
338: seldestroy(&so->so_rcv.sb_sel);
339: seldestroy(&so->so_snd.sb_sel);
340: mutex_obj_free(so->so_lock);
341: cv_destroy(&so->so_cv);
342: cv_destroy(&so->so_rcv.sb_cv);
343: cv_destroy(&so->so_snd.sb_cv);
1.94 ad 344: pool_cache_put(socket_cache, so);
1.91 ad 345: }
346:
1.7 mycroft 347: void
1.37 lukem 348: soqinsque(struct socket *head, struct socket *so, int q)
1.1 cgd 349: {
350:
1.91 ad 351: KASSERT(solocked2(head, so));
352:
1.22 thorpej 353: #ifdef DIAGNOSTIC
354: if (so->so_onq != NULL)
355: panic("soqinsque");
356: #endif
357:
1.1 cgd 358: so->so_head = head;
359: if (q == 0) {
360: head->so_q0len++;
1.22 thorpej 361: so->so_onq = &head->so_q0;
1.1 cgd 362: } else {
363: head->so_qlen++;
1.22 thorpej 364: so->so_onq = &head->so_q;
1.1 cgd 365: }
1.22 thorpej 366: TAILQ_INSERT_TAIL(so->so_onq, so, so_qe);
1.1 cgd 367: }
368:
1.7 mycroft 369: int
1.37 lukem 370: soqremque(struct socket *so, int q)
1.1 cgd 371: {
1.37 lukem 372: struct socket *head;
1.1 cgd 373:
1.37 lukem 374: head = so->so_head;
1.91 ad 375:
376: KASSERT(solocked(so));
1.22 thorpej 377: if (q == 0) {
378: if (so->so_onq != &head->so_q0)
1.17 thorpej 379: return (0);
1.1 cgd 380: head->so_q0len--;
381: } else {
1.22 thorpej 382: if (so->so_onq != &head->so_q)
383: return (0);
1.1 cgd 384: head->so_qlen--;
385: }
1.91 ad 386: KASSERT(solocked2(so, head));
1.22 thorpej 387: TAILQ_REMOVE(so->so_onq, so, so_qe);
388: so->so_onq = NULL;
389: so->so_head = NULL;
1.1 cgd 390: return (1);
391: }
392:
393: /*
394: * Socantsendmore indicates that no more data will be sent on the
395: * socket; it would normally be applied to a socket when the user
396: * informs the system that no more data is to be sent, by the protocol
397: * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
398: * will be received, and will normally be applied to the socket by a
399: * protocol when it detects that the peer will send no more data.
400: * Data queued for reading in the socket may yet be read.
401: */
402:
1.4 andrew 403: void
1.37 lukem 404: socantsendmore(struct socket *so)
1.1 cgd 405: {
406:
1.91 ad 407: KASSERT(solocked(so));
408:
1.1 cgd 409: so->so_state |= SS_CANTSENDMORE;
410: sowwakeup(so);
411: }
412:
1.4 andrew 413: void
1.37 lukem 414: socantrcvmore(struct socket *so)
1.1 cgd 415: {
416:
1.91 ad 417: KASSERT(solocked(so));
418:
1.1 cgd 419: so->so_state |= SS_CANTRCVMORE;
420: sorwakeup(so);
421: }
422:
423: /*
424: * Wait for data to arrive at/drain from a socket buffer.
425: */
1.7 mycroft 426: int
1.37 lukem 427: sbwait(struct sockbuf *sb)
1.1 cgd 428: {
1.91 ad 429: struct socket *so;
430: kmutex_t *lock;
431: int error;
1.1 cgd 432:
1.91 ad 433: so = sb->sb_so;
1.1 cgd 434:
1.91 ad 435: KASSERT(solocked(so));
1.1 cgd 436:
1.91 ad 437: sb->sb_flags |= SB_NOTIFY;
438: lock = so->so_lock;
439: if ((sb->sb_flags & SB_NOINTR) != 0)
440: error = cv_timedwait(&sb->sb_cv, lock, sb->sb_timeo);
441: else
442: error = cv_timedwait_sig(&sb->sb_cv, lock, sb->sb_timeo);
443: if (__predict_false(lock != so->so_lock))
444: solockretry(so, lock);
445: return error;
1.1 cgd 446: }
447:
448: /*
449: * Wakeup processes waiting on a socket buffer.
450: * Do asynchronous notification via SIGIO
1.39 manu 451: * if the socket buffer has the SB_ASYNC flag set.
1.1 cgd 452: */
1.7 mycroft 453: void
1.55 christos 454: sowakeup(struct socket *so, struct sockbuf *sb, int code)
1.1 cgd 455: {
1.90 rmind 456: int band;
457:
1.91 ad 458: KASSERT(solocked(so));
459: KASSERT(sb->sb_so == so);
460:
1.90 rmind 461: if (code == POLL_IN)
462: band = POLLIN|POLLRDNORM;
463: else
464: band = POLLOUT|POLLWRNORM;
1.91 ad 465: sb->sb_flags &= ~SB_NOTIFY;
466: selnotify(&sb->sb_sel, band, NOTE_SUBMIT);
467: cv_broadcast(&sb->sb_cv);
1.90 rmind 468: if (sb->sb_flags & SB_ASYNC)
1.57 christos 469: fownsignal(so->so_pgid, SIGIO, code, band, so);
1.24 matt 470: if (sb->sb_flags & SB_UPCALL)
1.104 tls 471: (*so->so_upcall)(so, so->so_upcallarg, band, M_DONTWAIT);
1.1 cgd 472: }
473:
474: /*
1.95 ad 475: * Reset a socket's lock pointer. Wake all threads waiting on the
476: * socket's condition variables so that they can restart their waits
477: * using the new lock. The existing lock must be held.
478: */
479: void
480: solockreset(struct socket *so, kmutex_t *lock)
481: {
482:
483: KASSERT(solocked(so));
484:
485: so->so_lock = lock;
486: cv_broadcast(&so->so_snd.sb_cv);
487: cv_broadcast(&so->so_rcv.sb_cv);
488: cv_broadcast(&so->so_cv);
489: }
490:
491: /*
1.1 cgd 492: * Socket buffer (struct sockbuf) utility routines.
493: *
494: * Each socket contains two socket buffers: one for sending data and
495: * one for receiving data. Each buffer contains a queue of mbufs,
496: * information about the number of mbufs and amount of data in the
1.13 mycroft 497: * queue, and other fields allowing poll() statements and notification
1.1 cgd 498: * on data availability to be implemented.
499: *
500: * Data stored in a socket buffer is maintained as a list of records.
501: * Each record is a list of mbufs chained together with the m_next
502: * field. Records are chained together with the m_nextpkt field. The upper
503: * level routine soreceive() expects the following conventions to be
504: * observed when placing information in the receive buffer:
505: *
506: * 1. If the protocol requires each message be preceded by the sender's
507: * name, then a record containing that name must be present before
508: * any associated data (mbuf's must be of type MT_SONAME).
509: * 2. If the protocol supports the exchange of ``access rights'' (really
510: * just additional data associated with the message), and there are
511: * ``rights'' to be received, then a record containing this data
1.10 mycroft 512: * should be present (mbuf's must be of type MT_CONTROL).
1.1 cgd 513: * 3. If a name or rights record exists, then it must be followed by
514: * a data record, perhaps of zero length.
515: *
516: * Before using a new socket structure it is first necessary to reserve
517: * buffer space to the socket, by calling sbreserve(). This should commit
518: * some of the available buffer space in the system buffer pool for the
519: * socket (currently, it does nothing but enforce limits). The space
520: * should be released by calling sbrelease() when the socket is destroyed.
521: */
522:
1.7 mycroft 523: int
1.58 thorpej 524: sb_max_set(u_long new_sbmax)
525: {
526: int s;
527:
528: if (new_sbmax < (16 * 1024))
529: return (EINVAL);
530:
531: s = splsoftnet();
532: sb_max = new_sbmax;
533: sb_max_adj = (u_quad_t)new_sbmax * MCLBYTES / (MSIZE + MCLBYTES);
534: splx(s);
535:
536: return (0);
537: }
538:
539: int
1.37 lukem 540: soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
1.1 cgd 541: {
1.91 ad 542:
543: KASSERT(so->so_lock == NULL || solocked(so));
544:
1.74 christos 545: /*
546: * there's at least one application (a configure script of screen)
547: * which expects a fifo is writable even if it has "some" bytes
548: * in its buffer.
549: * so we want to make sure (hiwat - lowat) >= (some bytes).
550: *
551: * PIPE_BUF here is an arbitrary value chosen as (some bytes) above.
552: * we expect it's large enough for such applications.
553: */
554: u_long lowat = MAX(sock_loan_thresh, MCLBYTES);
555: u_long hiwat = lowat + PIPE_BUF;
1.1 cgd 556:
1.74 christos 557: if (sndcc < hiwat)
558: sndcc = hiwat;
1.59 christos 559: if (sbreserve(&so->so_snd, sndcc, so) == 0)
1.1 cgd 560: goto bad;
1.59 christos 561: if (sbreserve(&so->so_rcv, rcvcc, so) == 0)
1.1 cgd 562: goto bad2;
563: if (so->so_rcv.sb_lowat == 0)
564: so->so_rcv.sb_lowat = 1;
565: if (so->so_snd.sb_lowat == 0)
1.74 christos 566: so->so_snd.sb_lowat = lowat;
1.1 cgd 567: if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
568: so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
569: return (0);
1.37 lukem 570: bad2:
1.59 christos 571: sbrelease(&so->so_snd, so);
1.37 lukem 572: bad:
1.1 cgd 573: return (ENOBUFS);
574: }
575:
576: /*
577: * Allot mbufs to a sockbuf.
578: * Attempt to scale mbmax so that mbcnt doesn't become limiting
579: * if buffering efficiency is near the normal case.
580: */
1.7 mycroft 581: int
1.59 christos 582: sbreserve(struct sockbuf *sb, u_long cc, struct socket *so)
1.1 cgd 583: {
1.75 ad 584: struct lwp *l = curlwp; /* XXX */
1.62 christos 585: rlim_t maxcc;
1.67 christos 586: struct uidinfo *uidinfo;
1.1 cgd 587:
1.91 ad 588: KASSERT(so->so_lock == NULL || solocked(so));
589: KASSERT(sb->sb_so == so);
590: KASSERT(sb_max_adj != 0);
591:
1.58 thorpej 592: if (cc == 0 || cc > sb_max_adj)
1.1 cgd 593: return (0);
1.93 christos 594:
1.105 elad 595: maxcc = l->l_proc->p_rlimit[RLIMIT_SBSIZE].rlim_cur;
1.93 christos 596:
597: uidinfo = so->so_uidinfo;
1.67 christos 598: if (!chgsbsize(uidinfo, &sb->sb_hiwat, cc, maxcc))
1.62 christos 599: return 0;
1.1 cgd 600: sb->sb_mbmax = min(cc * 2, sb_max);
601: if (sb->sb_lowat > sb->sb_hiwat)
602: sb->sb_lowat = sb->sb_hiwat;
603: return (1);
604: }
605:
606: /*
1.91 ad 607: * Free mbufs held by a socket, and reserved mbuf space. We do not assert
608: * that the socket is held locked here: see sorflush().
1.1 cgd 609: */
1.7 mycroft 610: void
1.59 christos 611: sbrelease(struct sockbuf *sb, struct socket *so)
1.1 cgd 612: {
613:
1.91 ad 614: KASSERT(sb->sb_so == so);
615:
1.1 cgd 616: sbflush(sb);
1.87 yamt 617: (void)chgsbsize(so->so_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY);
1.59 christos 618: sb->sb_mbmax = 0;
1.1 cgd 619: }
620:
621: /*
622: * Routines to add and remove
623: * data from an mbuf queue.
624: *
625: * The routines sbappend() or sbappendrecord() are normally called to
626: * append new mbufs to a socket buffer, after checking that adequate
627: * space is available, comparing the function sbspace() with the amount
628: * of data to be added. sbappendrecord() differs from sbappend() in
629: * that data supplied is treated as the beginning of a new record.
630: * To place a sender's address, optional access rights, and data in a
631: * socket receive buffer, sbappendaddr() should be used. To place
632: * access rights and data in a socket receive buffer, sbappendrights()
633: * should be used. In either case, the new data begins a new record.
634: * Note that unlike sbappend() and sbappendrecord(), these routines check
635: * for the caller that there will be enough space to store the data.
636: * Each fails if there is not enough space, or if it cannot find mbufs
637: * to store additional information in.
638: *
639: * Reliable protocols may use the socket send buffer to hold data
640: * awaiting acknowledgement. Data is normally copied from a socket
641: * send buffer in a protocol with m_copy for output to a peer,
642: * and then removing the data from the socket buffer with sbdrop()
643: * or sbdroprecord() when the data is acknowledged by the peer.
644: */
645:
1.43 thorpej 646: #ifdef SOCKBUF_DEBUG
647: void
648: sblastrecordchk(struct sockbuf *sb, const char *where)
649: {
650: struct mbuf *m = sb->sb_mb;
651:
1.91 ad 652: KASSERT(solocked(sb->sb_so));
653:
1.43 thorpej 654: while (m && m->m_nextpkt)
655: m = m->m_nextpkt;
656:
657: if (m != sb->sb_lastrecord) {
658: printf("sblastrecordchk: sb_mb %p sb_lastrecord %p last %p\n",
659: sb->sb_mb, sb->sb_lastrecord, m);
660: printf("packet chain:\n");
661: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
662: printf("\t%p\n", m);
1.47 provos 663: panic("sblastrecordchk from %s", where);
1.43 thorpej 664: }
665: }
666:
667: void
668: sblastmbufchk(struct sockbuf *sb, const char *where)
669: {
670: struct mbuf *m = sb->sb_mb;
671: struct mbuf *n;
672:
1.91 ad 673: KASSERT(solocked(sb->sb_so));
674:
1.43 thorpej 675: while (m && m->m_nextpkt)
676: m = m->m_nextpkt;
677:
678: while (m && m->m_next)
679: m = m->m_next;
680:
681: if (m != sb->sb_mbtail) {
682: printf("sblastmbufchk: sb_mb %p sb_mbtail %p last %p\n",
683: sb->sb_mb, sb->sb_mbtail, m);
684: printf("packet tree:\n");
685: for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
686: printf("\t");
687: for (n = m; n != NULL; n = n->m_next)
688: printf("%p ", n);
689: printf("\n");
690: }
691: panic("sblastmbufchk from %s", where);
692: }
693: }
694: #endif /* SOCKBUF_DEBUG */
695:
1.63 jonathan 696: /*
697: * Link a chain of records onto a socket buffer
698: */
699: #define SBLINKRECORDCHAIN(sb, m0, mlast) \
1.43 thorpej 700: do { \
701: if ((sb)->sb_lastrecord != NULL) \
702: (sb)->sb_lastrecord->m_nextpkt = (m0); \
703: else \
704: (sb)->sb_mb = (m0); \
1.63 jonathan 705: (sb)->sb_lastrecord = (mlast); \
1.43 thorpej 706: } while (/*CONSTCOND*/0)
707:
1.63 jonathan 708:
709: #define SBLINKRECORD(sb, m0) \
710: SBLINKRECORDCHAIN(sb, m0, m0)
711:
1.1 cgd 712: /*
713: * Append mbuf chain m to the last record in the
714: * socket buffer sb. The additional space associated
715: * the mbuf chain is recorded in sb. Empty mbufs are
716: * discarded and mbufs are compacted where possible.
717: */
1.7 mycroft 718: void
1.37 lukem 719: sbappend(struct sockbuf *sb, struct mbuf *m)
1.1 cgd 720: {
1.37 lukem 721: struct mbuf *n;
1.1 cgd 722:
1.91 ad 723: KASSERT(solocked(sb->sb_so));
724:
1.109.2.2! yamt 725: if (m == NULL)
1.1 cgd 726: return;
1.43 thorpej 727:
1.49 matt 728: #ifdef MBUFTRACE
1.65 jonathan 729: m_claimm(m, sb->sb_mowner);
1.49 matt 730: #endif
731:
1.43 thorpej 732: SBLASTRECORDCHK(sb, "sbappend 1");
733:
734: if ((n = sb->sb_lastrecord) != NULL) {
735: /*
736: * XXX Would like to simply use sb_mbtail here, but
737: * XXX I need to verify that I won't miss an EOR that
738: * XXX way.
739: */
1.1 cgd 740: do {
741: if (n->m_flags & M_EOR) {
742: sbappendrecord(sb, m); /* XXXXXX!!!! */
743: return;
744: }
745: } while (n->m_next && (n = n->m_next));
1.43 thorpej 746: } else {
747: /*
748: * If this is the first record in the socket buffer, it's
749: * also the last record.
750: */
751: sb->sb_lastrecord = m;
1.1 cgd 752: }
753: sbcompress(sb, m, n);
1.43 thorpej 754: SBLASTRECORDCHK(sb, "sbappend 2");
755: }
756:
757: /*
758: * This version of sbappend() should only be used when the caller
759: * absolutely knows that there will never be more than one record
760: * in the socket buffer, that is, a stream protocol (such as TCP).
761: */
762: void
1.44 thorpej 763: sbappendstream(struct sockbuf *sb, struct mbuf *m)
1.43 thorpej 764: {
765:
1.91 ad 766: KASSERT(solocked(sb->sb_so));
1.43 thorpej 767: KDASSERT(m->m_nextpkt == NULL);
768: KASSERT(sb->sb_mb == sb->sb_lastrecord);
769:
770: SBLASTMBUFCHK(sb, __func__);
771:
1.49 matt 772: #ifdef MBUFTRACE
1.65 jonathan 773: m_claimm(m, sb->sb_mowner);
1.49 matt 774: #endif
775:
1.43 thorpej 776: sbcompress(sb, m, sb->sb_mbtail);
777:
778: sb->sb_lastrecord = sb->sb_mb;
779: SBLASTRECORDCHK(sb, __func__);
1.1 cgd 780: }
781:
782: #ifdef SOCKBUF_DEBUG
1.7 mycroft 783: void
1.37 lukem 784: sbcheck(struct sockbuf *sb)
1.1 cgd 785: {
1.91 ad 786: struct mbuf *m, *m2;
1.43 thorpej 787: u_long len, mbcnt;
1.1 cgd 788:
1.91 ad 789: KASSERT(solocked(sb->sb_so));
790:
1.37 lukem 791: len = 0;
792: mbcnt = 0;
1.91 ad 793: for (m = sb->sb_mb; m; m = m->m_nextpkt) {
794: for (m2 = m; m2 != NULL; m2 = m2->m_next) {
795: len += m2->m_len;
796: mbcnt += MSIZE;
797: if (m2->m_flags & M_EXT)
798: mbcnt += m2->m_ext.ext_size;
799: if (m2->m_nextpkt != NULL)
800: panic("sbcheck nextpkt");
801: }
1.1 cgd 802: }
803: if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
1.43 thorpej 804: printf("cc %lu != %lu || mbcnt %lu != %lu\n", len, sb->sb_cc,
1.1 cgd 805: mbcnt, sb->sb_mbcnt);
806: panic("sbcheck");
807: }
808: }
809: #endif
810:
811: /*
812: * As above, except the mbuf chain
813: * begins a new record.
814: */
1.7 mycroft 815: void
1.37 lukem 816: sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 817: {
1.37 lukem 818: struct mbuf *m;
1.1 cgd 819:
1.91 ad 820: KASSERT(solocked(sb->sb_so));
821:
1.109.2.2! yamt 822: if (m0 == NULL)
1.1 cgd 823: return;
1.43 thorpej 824:
1.49 matt 825: #ifdef MBUFTRACE
1.65 jonathan 826: m_claimm(m0, sb->sb_mowner);
1.49 matt 827: #endif
1.1 cgd 828: /*
829: * Put the first mbuf on the queue.
830: * Note this permits zero length records.
831: */
832: sballoc(sb, m0);
1.43 thorpej 833: SBLASTRECORDCHK(sb, "sbappendrecord 1");
834: SBLINKRECORD(sb, m0);
1.1 cgd 835: m = m0->m_next;
836: m0->m_next = 0;
837: if (m && (m0->m_flags & M_EOR)) {
838: m0->m_flags &= ~M_EOR;
839: m->m_flags |= M_EOR;
840: }
841: sbcompress(sb, m, m0);
1.43 thorpej 842: SBLASTRECORDCHK(sb, "sbappendrecord 2");
1.1 cgd 843: }
844:
845: /*
846: * As above except that OOB data
847: * is inserted at the beginning of the sockbuf,
848: * but after any other OOB data.
849: */
1.7 mycroft 850: void
1.37 lukem 851: sbinsertoob(struct sockbuf *sb, struct mbuf *m0)
1.1 cgd 852: {
1.37 lukem 853: struct mbuf *m, **mp;
1.1 cgd 854:
1.91 ad 855: KASSERT(solocked(sb->sb_so));
856:
1.109.2.2! yamt 857: if (m0 == NULL)
1.1 cgd 858: return;
1.43 thorpej 859:
860: SBLASTRECORDCHK(sb, "sbinsertoob 1");
861:
1.11 christos 862: for (mp = &sb->sb_mb; (m = *mp) != NULL; mp = &((*mp)->m_nextpkt)) {
1.1 cgd 863: again:
864: switch (m->m_type) {
865:
866: case MT_OOBDATA:
867: continue; /* WANT next train */
868:
869: case MT_CONTROL:
1.11 christos 870: if ((m = m->m_next) != NULL)
1.1 cgd 871: goto again; /* inspect THIS train further */
872: }
873: break;
874: }
875: /*
876: * Put the first mbuf on the queue.
877: * Note this permits zero length records.
878: */
879: sballoc(sb, m0);
880: m0->m_nextpkt = *mp;
1.43 thorpej 881: if (*mp == NULL) {
882: /* m0 is actually the new tail */
883: sb->sb_lastrecord = m0;
884: }
1.1 cgd 885: *mp = m0;
886: m = m0->m_next;
887: m0->m_next = 0;
888: if (m && (m0->m_flags & M_EOR)) {
889: m0->m_flags &= ~M_EOR;
890: m->m_flags |= M_EOR;
891: }
892: sbcompress(sb, m, m0);
1.43 thorpej 893: SBLASTRECORDCHK(sb, "sbinsertoob 2");
1.1 cgd 894: }
895:
896: /*
897: * Append address and data, and optionally, control (ancillary) data
898: * to the receive queue of a socket. If present,
899: * m0 must include a packet header with total length.
900: * Returns 0 if no space in sockbuf or insufficient mbufs.
901: */
1.7 mycroft 902: int
1.61 matt 903: sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0,
1.37 lukem 904: struct mbuf *control)
1.1 cgd 905: {
1.43 thorpej 906: struct mbuf *m, *n, *nlast;
1.50 fvdl 907: int space, len;
1.1 cgd 908:
1.91 ad 909: KASSERT(solocked(sb->sb_so));
910:
1.37 lukem 911: space = asa->sa_len;
912:
1.49 matt 913: if (m0 != NULL) {
914: if ((m0->m_flags & M_PKTHDR) == 0)
915: panic("sbappendaddr");
1.1 cgd 916: space += m0->m_pkthdr.len;
1.49 matt 917: #ifdef MBUFTRACE
1.65 jonathan 918: m_claimm(m0, sb->sb_mowner);
1.49 matt 919: #endif
920: }
1.1 cgd 921: for (n = control; n; n = n->m_next) {
922: space += n->m_len;
1.49 matt 923: MCLAIM(n, sb->sb_mowner);
1.109.2.2! yamt 924: if (n->m_next == NULL) /* keep pointer to last control buf */
1.1 cgd 925: break;
926: }
927: if (space > sbspace(sb))
928: return (0);
1.109.2.2! yamt 929: m = m_get(M_DONTWAIT, MT_SONAME);
! 930: if (m == NULL)
1.1 cgd 931: return (0);
1.49 matt 932: MCLAIM(m, sb->sb_mowner);
1.50 fvdl 933: /*
934: * XXX avoid 'comparison always true' warning which isn't easily
935: * avoided.
936: */
937: len = asa->sa_len;
938: if (len > MLEN) {
1.20 thorpej 939: MEXTMALLOC(m, asa->sa_len, M_NOWAIT);
940: if ((m->m_flags & M_EXT) == 0) {
941: m_free(m);
942: return (0);
943: }
944: }
1.1 cgd 945: m->m_len = asa->sa_len;
1.82 christos 946: memcpy(mtod(m, void *), asa, asa->sa_len);
1.1 cgd 947: if (n)
948: n->m_next = m0; /* concatenate data to control */
949: else
950: control = m0;
951: m->m_next = control;
1.43 thorpej 952:
953: SBLASTRECORDCHK(sb, "sbappendaddr 1");
954:
955: for (n = m; n->m_next != NULL; n = n->m_next)
1.1 cgd 956: sballoc(sb, n);
1.43 thorpej 957: sballoc(sb, n);
958: nlast = n;
959: SBLINKRECORD(sb, m);
960:
961: sb->sb_mbtail = nlast;
962: SBLASTMBUFCHK(sb, "sbappendaddr");
963: SBLASTRECORDCHK(sb, "sbappendaddr 2");
964:
1.1 cgd 965: return (1);
966: }
967:
1.63 jonathan 968: /*
969: * Helper for sbappendchainaddr: prepend a struct sockaddr* to
970: * an mbuf chain.
971: */
1.70 perry 972: static inline struct mbuf *
1.81 yamt 973: m_prepend_sockaddr(struct sockbuf *sb, struct mbuf *m0,
1.64 jonathan 974: const struct sockaddr *asa)
1.63 jonathan 975: {
976: struct mbuf *m;
1.64 jonathan 977: const int salen = asa->sa_len;
1.63 jonathan 978:
1.91 ad 979: KASSERT(solocked(sb->sb_so));
980:
1.63 jonathan 981: /* only the first in each chain need be a pkthdr */
1.109.2.2! yamt 982: m = m_gethdr(M_DONTWAIT, MT_SONAME);
! 983: if (m == NULL)
! 984: return NULL;
1.63 jonathan 985: MCLAIM(m, sb->sb_mowner);
1.64 jonathan 986: #ifdef notyet
987: if (salen > MHLEN) {
988: MEXTMALLOC(m, salen, M_NOWAIT);
989: if ((m->m_flags & M_EXT) == 0) {
990: m_free(m);
1.109.2.2! yamt 991: return NULL;
1.64 jonathan 992: }
993: }
994: #else
995: KASSERT(salen <= MHLEN);
996: #endif
997: m->m_len = salen;
1.82 christos 998: memcpy(mtod(m, void *), asa, salen);
1.63 jonathan 999: m->m_next = m0;
1.64 jonathan 1000: m->m_pkthdr.len = salen + m0->m_pkthdr.len;
1.63 jonathan 1001:
1002: return m;
1003: }
1004:
1005: int
1006: sbappendaddrchain(struct sockbuf *sb, const struct sockaddr *asa,
1007: struct mbuf *m0, int sbprio)
1008: {
1009: struct mbuf *m, *n, *n0, *nlast;
1010: int error;
1011:
1.91 ad 1012: KASSERT(solocked(sb->sb_so));
1013:
1.63 jonathan 1014: /*
1015: * XXX sbprio reserved for encoding priority of this* request:
1016: * SB_PRIO_NONE --> honour normal sb limits
1017: * SB_PRIO_ONESHOT_OVERFLOW --> if socket has any space,
1018: * take whole chain. Intended for large requests
1019: * that should be delivered atomically (all, or none).
1020: * SB_PRIO_OVERDRAFT -- allow a small (2*MLEN) overflow
1021: * over normal socket limits, for messages indicating
1022: * buffer overflow in earlier normal/lower-priority messages
1023: * SB_PRIO_BESTEFFORT --> ignore limits entirely.
1024: * Intended for kernel-generated messages only.
1025: * Up to generator to avoid total mbuf resource exhaustion.
1026: */
1027: (void)sbprio;
1028:
1029: if (m0 && (m0->m_flags & M_PKTHDR) == 0)
1030: panic("sbappendaddrchain");
1031:
1.109.2.2! yamt 1032: #ifdef notyet
1.63 jonathan 1033: space = sbspace(sb);
1.66 perry 1034:
1035: /*
1.63 jonathan 1036: * Enforce SB_PRIO_* limits as described above.
1037: */
1038: #endif
1039:
1040: n0 = NULL;
1041: nlast = NULL;
1042: for (m = m0; m; m = m->m_nextpkt) {
1043: struct mbuf *np;
1044:
1.64 jonathan 1045: #ifdef MBUFTRACE
1.65 jonathan 1046: m_claimm(m, sb->sb_mowner);
1.64 jonathan 1047: #endif
1048:
1.63 jonathan 1049: /* Prepend sockaddr to this record (m) of input chain m0 */
1.64 jonathan 1050: n = m_prepend_sockaddr(sb, m, asa);
1.63 jonathan 1051: if (n == NULL) {
1052: error = ENOBUFS;
1053: goto bad;
1054: }
1055:
1056: /* Append record (asa+m) to end of new chain n0 */
1057: if (n0 == NULL) {
1058: n0 = n;
1059: } else {
1060: nlast->m_nextpkt = n;
1061: }
1062: /* Keep track of last record on new chain */
1063: nlast = n;
1064:
1065: for (np = n; np; np = np->m_next)
1066: sballoc(sb, np);
1067: }
1068:
1.64 jonathan 1069: SBLASTRECORDCHK(sb, "sbappendaddrchain 1");
1070:
1.63 jonathan 1071: /* Drop the entire chain of (asa+m) records onto the socket */
1072: SBLINKRECORDCHAIN(sb, n0, nlast);
1.64 jonathan 1073:
1074: SBLASTRECORDCHK(sb, "sbappendaddrchain 2");
1075:
1.63 jonathan 1076: for (m = nlast; m->m_next; m = m->m_next)
1077: ;
1078: sb->sb_mbtail = m;
1.64 jonathan 1079: SBLASTMBUFCHK(sb, "sbappendaddrchain");
1080:
1.63 jonathan 1081: return (1);
1082:
1083: bad:
1.64 jonathan 1084: /*
1085: * On error, free the prepended addreseses. For consistency
1086: * with sbappendaddr(), leave it to our caller to free
1087: * the input record chain passed to us as m0.
1088: */
1089: while ((n = n0) != NULL) {
1090: struct mbuf *np;
1091:
1092: /* Undo the sballoc() of this record */
1093: for (np = n; np; np = np->m_next)
1094: sbfree(sb, np);
1095:
1096: n0 = n->m_nextpkt; /* iterate at next prepended address */
1097: MFREE(n, np); /* free prepended address (not data) */
1098: }
1.109.2.2! yamt 1099: return error;
1.63 jonathan 1100: }
1101:
1102:
1.7 mycroft 1103: int
1.37 lukem 1104: sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
1.1 cgd 1105: {
1.43 thorpej 1106: struct mbuf *m, *mlast, *n;
1.37 lukem 1107: int space;
1.1 cgd 1108:
1.91 ad 1109: KASSERT(solocked(sb->sb_so));
1110:
1.37 lukem 1111: space = 0;
1.109.2.2! yamt 1112: if (control == NULL)
1.1 cgd 1113: panic("sbappendcontrol");
1114: for (m = control; ; m = m->m_next) {
1115: space += m->m_len;
1.49 matt 1116: MCLAIM(m, sb->sb_mowner);
1.109.2.2! yamt 1117: if (m->m_next == NULL)
1.1 cgd 1118: break;
1119: }
1120: n = m; /* save pointer to last control buffer */
1.49 matt 1121: for (m = m0; m; m = m->m_next) {
1122: MCLAIM(m, sb->sb_mowner);
1.1 cgd 1123: space += m->m_len;
1.49 matt 1124: }
1.1 cgd 1125: if (space > sbspace(sb))
1126: return (0);
1127: n->m_next = m0; /* concatenate data to control */
1.43 thorpej 1128:
1129: SBLASTRECORDCHK(sb, "sbappendcontrol 1");
1130:
1131: for (m = control; m->m_next != NULL; m = m->m_next)
1.1 cgd 1132: sballoc(sb, m);
1.43 thorpej 1133: sballoc(sb, m);
1134: mlast = m;
1135: SBLINKRECORD(sb, control);
1136:
1137: sb->sb_mbtail = mlast;
1138: SBLASTMBUFCHK(sb, "sbappendcontrol");
1139: SBLASTRECORDCHK(sb, "sbappendcontrol 2");
1140:
1.1 cgd 1141: return (1);
1142: }
1143:
1144: /*
1145: * Compress mbuf chain m into the socket
1146: * buffer sb following mbuf n. If n
1147: * is null, the buffer is presumed empty.
1148: */
1.7 mycroft 1149: void
1.37 lukem 1150: sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1.1 cgd 1151: {
1.37 lukem 1152: int eor;
1153: struct mbuf *o;
1.1 cgd 1154:
1.91 ad 1155: KASSERT(solocked(sb->sb_so));
1156:
1.37 lukem 1157: eor = 0;
1.1 cgd 1158: while (m) {
1159: eor |= m->m_flags & M_EOR;
1160: if (m->m_len == 0 &&
1161: (eor == 0 ||
1162: (((o = m->m_next) || (o = n)) &&
1163: o->m_type == m->m_type))) {
1.46 thorpej 1164: if (sb->sb_lastrecord == m)
1165: sb->sb_lastrecord = m->m_next;
1.1 cgd 1166: m = m_free(m);
1167: continue;
1168: }
1.40 thorpej 1169: if (n && (n->m_flags & M_EOR) == 0 &&
1170: /* M_TRAILINGSPACE() checks buffer writeability */
1171: m->m_len <= MCLBYTES / 4 && /* XXX Don't copy too much */
1172: m->m_len <= M_TRAILINGSPACE(n) &&
1173: n->m_type == m->m_type) {
1.82 christos 1174: memcpy(mtod(n, char *) + n->m_len, mtod(m, void *),
1.1 cgd 1175: (unsigned)m->m_len);
1176: n->m_len += m->m_len;
1177: sb->sb_cc += m->m_len;
1178: m = m_free(m);
1179: continue;
1180: }
1181: if (n)
1182: n->m_next = m;
1183: else
1184: sb->sb_mb = m;
1.43 thorpej 1185: sb->sb_mbtail = m;
1.1 cgd 1186: sballoc(sb, m);
1187: n = m;
1188: m->m_flags &= ~M_EOR;
1189: m = m->m_next;
1190: n->m_next = 0;
1191: }
1192: if (eor) {
1193: if (n)
1194: n->m_flags |= eor;
1195: else
1.15 christos 1196: printf("semi-panic: sbcompress\n");
1.1 cgd 1197: }
1.43 thorpej 1198: SBLASTMBUFCHK(sb, __func__);
1.1 cgd 1199: }
1200:
1201: /*
1202: * Free all mbufs in a sockbuf.
1203: * Check that all resources are reclaimed.
1204: */
1.7 mycroft 1205: void
1.37 lukem 1206: sbflush(struct sockbuf *sb)
1.1 cgd 1207: {
1208:
1.91 ad 1209: KASSERT(solocked(sb->sb_so));
1.43 thorpej 1210: KASSERT((sb->sb_flags & SB_LOCK) == 0);
1211:
1.1 cgd 1212: while (sb->sb_mbcnt)
1213: sbdrop(sb, (int)sb->sb_cc);
1.43 thorpej 1214:
1215: KASSERT(sb->sb_cc == 0);
1216: KASSERT(sb->sb_mb == NULL);
1217: KASSERT(sb->sb_mbtail == NULL);
1218: KASSERT(sb->sb_lastrecord == NULL);
1.1 cgd 1219: }
1220:
1221: /*
1222: * Drop data from (the front of) a sockbuf.
1223: */
1.7 mycroft 1224: void
1.37 lukem 1225: sbdrop(struct sockbuf *sb, int len)
1.1 cgd 1226: {
1.37 lukem 1227: struct mbuf *m, *mn, *next;
1.1 cgd 1228:
1.91 ad 1229: KASSERT(solocked(sb->sb_so));
1230:
1.109.2.2! yamt 1231: next = (m = sb->sb_mb) ? m->m_nextpkt : NULL;
1.1 cgd 1232: while (len > 0) {
1.109.2.2! yamt 1233: if (m == NULL) {
! 1234: if (next == NULL)
! 1235: panic("sbdrop(%p,%d): cc=%lu",
! 1236: sb, len, sb->sb_cc);
1.1 cgd 1237: m = next;
1238: next = m->m_nextpkt;
1239: continue;
1240: }
1241: if (m->m_len > len) {
1242: m->m_len -= len;
1243: m->m_data += len;
1244: sb->sb_cc -= len;
1245: break;
1246: }
1247: len -= m->m_len;
1248: sbfree(sb, m);
1249: MFREE(m, mn);
1250: m = mn;
1251: }
1252: while (m && m->m_len == 0) {
1253: sbfree(sb, m);
1254: MFREE(m, mn);
1255: m = mn;
1256: }
1257: if (m) {
1258: sb->sb_mb = m;
1259: m->m_nextpkt = next;
1260: } else
1261: sb->sb_mb = next;
1.43 thorpej 1262: /*
1.45 thorpej 1263: * First part is an inline SB_EMPTY_FIXUP(). Second part
1.43 thorpej 1264: * makes sure sb_lastrecord is up-to-date if we dropped
1265: * part of the last record.
1266: */
1267: m = sb->sb_mb;
1268: if (m == NULL) {
1269: sb->sb_mbtail = NULL;
1270: sb->sb_lastrecord = NULL;
1271: } else if (m->m_nextpkt == NULL)
1272: sb->sb_lastrecord = m;
1.1 cgd 1273: }
1274:
1275: /*
1276: * Drop a record off the front of a sockbuf
1277: * and move the next record to the front.
1278: */
1.7 mycroft 1279: void
1.37 lukem 1280: sbdroprecord(struct sockbuf *sb)
1.1 cgd 1281: {
1.37 lukem 1282: struct mbuf *m, *mn;
1.1 cgd 1283:
1.91 ad 1284: KASSERT(solocked(sb->sb_so));
1285:
1.1 cgd 1286: m = sb->sb_mb;
1287: if (m) {
1288: sb->sb_mb = m->m_nextpkt;
1289: do {
1290: sbfree(sb, m);
1291: MFREE(m, mn);
1.11 christos 1292: } while ((m = mn) != NULL);
1.1 cgd 1293: }
1.45 thorpej 1294: SB_EMPTY_FIXUP(sb);
1.19 thorpej 1295: }
1296:
1297: /*
1298: * Create a "control" mbuf containing the specified data
1299: * with the specified type for presentation on a socket buffer.
1300: */
1301: struct mbuf *
1.109.2.2! yamt 1302: sbcreatecontrol1(void **p, int size, int type, int level, int flags)
1.19 thorpej 1303: {
1.37 lukem 1304: struct cmsghdr *cp;
1305: struct mbuf *m;
1.109.2.2! yamt 1306: int space = CMSG_SPACE(size);
1.19 thorpej 1307:
1.109.2.2! yamt 1308: if ((flags & M_DONTWAIT) && space > MCLBYTES) {
! 1309: printf("%s: message too large %d\n", __func__, space);
1.30 itojun 1310: return NULL;
1311: }
1312:
1.109.2.2! yamt 1313: if ((m = m_get(flags, MT_CONTROL)) == NULL)
! 1314: return NULL;
! 1315: if (space > MLEN) {
! 1316: if (space > MCLBYTES)
! 1317: MEXTMALLOC(m, space, M_WAITOK);
! 1318: else
! 1319: MCLGET(m, flags);
1.30 itojun 1320: if ((m->m_flags & M_EXT) == 0) {
1321: m_free(m);
1322: return NULL;
1323: }
1324: }
1.19 thorpej 1325: cp = mtod(m, struct cmsghdr *);
1.109.2.2! yamt 1326: *p = CMSG_DATA(cp);
! 1327: m->m_len = space;
1.35 itojun 1328: cp->cmsg_len = CMSG_LEN(size);
1.19 thorpej 1329: cp->cmsg_level = level;
1330: cp->cmsg_type = type;
1.109.2.2! yamt 1331: return m;
! 1332: }
! 1333:
! 1334: struct mbuf *
! 1335: sbcreatecontrol(void *p, int size, int type, int level)
! 1336: {
! 1337: struct mbuf *m;
! 1338: void *v;
! 1339:
! 1340: m = sbcreatecontrol1(&v, size, type, level, M_DONTWAIT);
! 1341: if (m == NULL)
! 1342: return NULL;
! 1343: memcpy(v, p, size);
! 1344: return m;
1.1 cgd 1345: }
1.91 ad 1346:
1347: void
1348: solockretry(struct socket *so, kmutex_t *lock)
1349: {
1350:
1351: while (lock != so->so_lock) {
1352: mutex_exit(lock);
1353: lock = so->so_lock;
1354: mutex_enter(lock);
1355: }
1356: }
1357:
1358: bool
1359: solocked(struct socket *so)
1360: {
1361:
1362: return mutex_owned(so->so_lock);
1363: }
1364:
1365: bool
1366: solocked2(struct socket *so1, struct socket *so2)
1367: {
1368: kmutex_t *lock;
1369:
1370: lock = so1->so_lock;
1371: if (lock != so2->so_lock)
1372: return false;
1373: return mutex_owned(lock);
1374: }
1375:
1376: /*
1377: * Assign a default lock to a new socket. For PRU_ATTACH, and done by
1378: * protocols that do not have special locking requirements.
1379: */
1380: void
1381: sosetlock(struct socket *so)
1382: {
1383: kmutex_t *lock;
1384:
1385: if (so->so_lock == NULL) {
1386: lock = softnet_lock;
1387: so->so_lock = lock;
1388: mutex_obj_hold(lock);
1389: mutex_enter(lock);
1390: }
1391:
1392: /* In all cases, lock must be held on return from PRU_ATTACH. */
1393: KASSERT(solocked(so));
1394: }
1395:
1396: /*
1397: * Set lock on sockbuf sb; sleep if lock is already held.
1398: * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
1399: * Returns error without lock if sleep is interrupted.
1400: */
1401: int
1402: sblock(struct sockbuf *sb, int wf)
1403: {
1404: struct socket *so;
1405: kmutex_t *lock;
1406: int error;
1407:
1408: KASSERT(solocked(sb->sb_so));
1409:
1410: for (;;) {
1411: if (__predict_true((sb->sb_flags & SB_LOCK) == 0)) {
1412: sb->sb_flags |= SB_LOCK;
1413: return 0;
1414: }
1415: if (wf != M_WAITOK)
1416: return EWOULDBLOCK;
1417: so = sb->sb_so;
1418: lock = so->so_lock;
1419: if ((sb->sb_flags & SB_NOINTR) != 0) {
1420: cv_wait(&so->so_cv, lock);
1421: error = 0;
1422: } else
1423: error = cv_wait_sig(&so->so_cv, lock);
1424: if (__predict_false(lock != so->so_lock))
1425: solockretry(so, lock);
1426: if (error != 0)
1427: return error;
1428: }
1429: }
1430:
1431: void
1432: sbunlock(struct sockbuf *sb)
1433: {
1434: struct socket *so;
1435:
1436: so = sb->sb_so;
1437:
1438: KASSERT(solocked(so));
1439: KASSERT((sb->sb_flags & SB_LOCK) != 0);
1440:
1441: sb->sb_flags &= ~SB_LOCK;
1442: cv_broadcast(&so->so_cv);
1443: }
1444:
1445: int
1.101 yamt 1446: sowait(struct socket *so, bool catch, int timo)
1.91 ad 1447: {
1448: kmutex_t *lock;
1449: int error;
1450:
1451: KASSERT(solocked(so));
1.101 yamt 1452: KASSERT(catch || timo != 0);
1.91 ad 1453:
1454: lock = so->so_lock;
1.101 yamt 1455: if (catch)
1456: error = cv_timedwait_sig(&so->so_cv, lock, timo);
1457: else
1458: error = cv_timedwait(&so->so_cv, lock, timo);
1.91 ad 1459: if (__predict_false(lock != so->so_lock))
1460: solockretry(so, lock);
1461: return error;
1462: }
CVSweb <webmaster@jp.NetBSD.org>