Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.164.2.3 retrieving revision 1.254 diff -u -p -r1.164.2.3 -r1.254 --- src/sys/kern/uipc_socket.c 2008/10/10 22:34:14 1.164.2.3 +++ src/sys/kern/uipc_socket.c 2017/05/25 20:42:36 1.254 @@ -1,11 +1,11 @@ -/* $NetBSD: uipc_socket.c,v 1.164.2.3 2008/10/10 22:34:14 skrll Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.254 2017/05/25 20:42:36 christos Exp $ */ /*- - * Copyright (c) 2002, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Jason R. Thorpe of Wasabi Systems, Inc. + * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -62,15 +62,26 @@ * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 */ +/* + * Socket operation routines. + * + * These routines are called by the routines in sys_socket.c or from a + * system process, and implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ + #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.164.2.3 2008/10/10 22:34:14 skrll Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.254 2017/05/25 20:42:36 christos Exp $"); -#include "opt_inet.h" +#ifdef _KERNEL_OPT +#include "opt_compat_netbsd.h" #include "opt_sock_counters.h" #include "opt_sosend_loan.h" #include "opt_mbuftrace.h" #include "opt_somaxkva.h" #include "opt_multiprocessor.h" /* XXX */ +#include "opt_sctp.h" +#endif #include #include @@ -86,15 +97,23 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #include #include #include #include #include +#include + +#ifdef COMPAT_50 +#include +#include +#endif -#include +#include +#include +#include -MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); extern const struct fileops socketops; @@ -127,8 +146,6 @@ EVCNT_ATTACH_STATIC(sosend_kvalimit); #endif /* SOSEND_COUNTERS */ -static struct callback_entry sokva_reclaimerentry; - #if defined(SOSEND_NO_LOAN) || defined(MULTIPROCESSOR) int sock_loan_thresh = -1; #else @@ -136,7 +153,7 @@ int sock_loan_thresh = 4096; #endif static kmutex_t so_pendfree_lock; -static struct mbuf *so_pendfree; +static struct mbuf *so_pendfree = NULL; #ifndef SOMAXKVA #define SOMAXKVA (16 * 1024 * 1024) @@ -145,10 +162,16 @@ int somaxkva = SOMAXKVA; static int socurkva; static kcondvar_t socurkva_cv; +static kauth_listener_t socket_listener; + #define SOCK_LOAN_CHUNK 65536 -static size_t sodopendfree(void); -static size_t sodopendfreel(void); +static void sopendfree_thread(void *); +static kcondvar_t pendfree_thread_cv; +static lwp_t *sopendfree_lwp; + +static void sysctl_kern_socket_setup(void); +static struct sysctllog *socket_sysctllog; static vsize_t sokvareserve(struct socket *so, vsize_t len) @@ -157,21 +180,6 @@ sokvareserve(struct socket *so, vsize_t mutex_enter(&so_pendfree_lock); while (socurkva + len > somaxkva) { - size_t freed; - - /* - * try to do pendfree. - */ - - freed = sodopendfreel(); - - /* - * if some kva was freed, try again. - */ - - if (freed) - continue; - SOSEND_COUNTER_INCR(&sosend_kvalimit); error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); if (error) { @@ -199,7 +207,7 @@ sokvaunreserve(vsize_t len) */ vaddr_t -sokvaalloc(vsize_t len, struct socket *so) +sokvaalloc(vaddr_t sva, vsize_t len, struct socket *so) { vaddr_t lva; @@ -214,7 +222,8 @@ sokvaalloc(vsize_t len, struct socket *s * allocate kva. */ - lva = uvm_km_alloc(kernel_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA); + lva = uvm_km_alloc(kernel_map, len, atop(sva) & uvmexp.colormask, + UVM_KMF_COLORMATCH | UVM_KMF_VAONLY | UVM_KMF_WAITVA); if (lva == 0) { sokvaunreserve(len); return (0); @@ -264,56 +273,46 @@ sodoloanfree(struct vm_page **pgs, void sokvafree(sva, len); } -static size_t -sodopendfree(void) -{ - size_t rv; - - if (__predict_true(so_pendfree == NULL)) - return 0; - - mutex_enter(&so_pendfree_lock); - rv = sodopendfreel(); - mutex_exit(&so_pendfree_lock); - - return rv; -} - /* - * sodopendfreel: free mbufs on "pendfree" list. + * sopendfree_thread: free mbufs on "pendfree" list. * unlock and relock so_pendfree_lock when freeing mbufs. - * - * => called with so_pendfree_lock held. */ -static size_t -sodopendfreel(void) +static void +sopendfree_thread(void *v) { struct mbuf *m, *next; - size_t rv = 0; - - KASSERT(mutex_owned(&so_pendfree_lock)); + size_t rv; - while (so_pendfree != NULL) { - m = so_pendfree; - so_pendfree = NULL; - mutex_exit(&so_pendfree_lock); + mutex_enter(&so_pendfree_lock); - for (; m != NULL; m = next) { - next = m->m_next; - KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); - KASSERT(m->m_ext.ext_refcnt == 0); + for (;;) { + rv = 0; + while (so_pendfree != NULL) { + m = so_pendfree; + so_pendfree = NULL; + mutex_exit(&so_pendfree_lock); + + for (; m != NULL; m = next) { + next = m->m_next; + KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == + 0); + KASSERT(m->m_ext.ext_refcnt == 0); + + rv += m->m_ext.ext_size; + sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, + m->m_ext.ext_size); + pool_cache_put(mb_cache, m); + } - rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, - m->m_ext.ext_size); - pool_cache_put(mb_cache, m); + mutex_enter(&so_pendfree_lock); } - - mutex_enter(&so_pendfree_lock); + if (rv) + cv_broadcast(&socurkva_cv); + cv_wait(&pendfree_thread_cv, &so_pendfree_lock); } - - return (rv); + panic("sopendfree_thread"); + /* NOTREACHED */ } void @@ -332,7 +331,7 @@ soloanfree(struct mbuf *m, void *buf, si mutex_enter(&so_pendfree_lock); m->m_next = so_pendfree; so_pendfree = m; - cv_broadcast(&socurkva_cv); + cv_signal(&pendfree_thread_cv); mutex_exit(&so_pendfree_lock); } @@ -362,7 +361,7 @@ sosend_loan(struct socket *so, struct ui KASSERT(npgs <= M_EXT_MAXPAGES); - lva = sokvaalloc(len, so); + lva = sokvaalloc(sva, len, so); if (lva == 0) return 0; @@ -375,7 +374,7 @@ sosend_loan(struct socket *so, struct ui for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), - VM_PROT_READ); + VM_PROT_READ, 0); pmap_update(pmap_kernel()); lva += (vaddr_t) iov->iov_base & PAGE_MASK; @@ -395,20 +394,6 @@ sosend_loan(struct socket *so, struct ui return (space); } -static int -sokva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) -{ - - KASSERT(ce == &sokva_reclaimerentry); - KASSERT(obj == NULL); - - sodopendfree(); - if (!vm_map_starved_p(kernel_map)) { - return CALLBACK_CHAIN_ABORT; - } - return CALLBACK_CHAIN_CONTINUE; -} - struct mbuf * getsombuf(struct socket *so, int type) { @@ -419,31 +404,99 @@ getsombuf(struct socket *so, int type) return m; } +static int +socket_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, + void *arg0, void *arg1, void *arg2, void *arg3) +{ + int result; + enum kauth_network_req req; + + result = KAUTH_RESULT_DEFER; + req = (enum kauth_network_req)arg0; + + if ((action != KAUTH_NETWORK_SOCKET) && + (action != KAUTH_NETWORK_BIND)) + return result; + + switch (req) { + case KAUTH_REQ_NETWORK_BIND_PORT: + result = KAUTH_RESULT_ALLOW; + break; + + case KAUTH_REQ_NETWORK_SOCKET_DROP: { + /* Normal users can only drop their own connections. */ + struct socket *so = (struct socket *)arg1; + + if (so->so_cred && proc_uidmatch(cred, so->so_cred) == 0) + result = KAUTH_RESULT_ALLOW; + + break; + } + + case KAUTH_REQ_NETWORK_SOCKET_OPEN: + /* We allow "raw" routing/bluetooth sockets to anyone. */ + switch ((u_long)arg1) { + case PF_ROUTE: + case PF_OROUTE: + case PF_BLUETOOTH: + result = KAUTH_RESULT_ALLOW; + break; + default: + /* Privileged, let secmodel handle this. */ + if ((u_long)arg2 == SOCK_RAW) + break; + result = KAUTH_RESULT_ALLOW; + break; + } + break; + + case KAUTH_REQ_NETWORK_SOCKET_CANSEE: + result = KAUTH_RESULT_ALLOW; + + break; + + default: + break; + } + + return result; +} + void soinit(void) { + sysctl_kern_socket_setup(); + mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); cv_init(&socurkva_cv, "sokva"); + cv_init(&pendfree_thread_cv, "sopendfr"); soinit2(); /* Set the initial adjusted socket buffer size. */ if (sb_max_set(sb_max)) panic("bad initial sb_max value: %lu", sb_max); - callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, - &sokva_reclaimerentry, NULL, sokva_reclaim_callback); + socket_listener = kauth_listen_scope(KAUTH_SCOPE_NETWORK, + socket_listener_cb, NULL); +} + +void +soinit1(void) +{ + int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, + sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree"); + if (error) + panic("soinit1 %d", error); } /* - * Socket operation routines. - * These routines are called by the routines in - * sys_socket.c or from a system process, and - * implement the semantics of socket operations by - * switching out to the protocol specific routines. + * socreate: create a new socket of the specified type and the protocol. + * + * => Caller may specify another socket for lock sharing (must not be held). + * => Returns the new socket without lock held. */ -/*ARGSUSED*/ int socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l, struct socket *lockso) @@ -473,7 +526,7 @@ socreate(int dom, struct socket **aso, i return EPROTOTYPE; return EPROTONOSUPPORT; } - if (prp->pr_usrreq == NULL) + if (prp->pr_usrreqs == NULL) return EPROTONOSUPPORT; if (prp->pr_type != type) return EPROTOTYPE; @@ -490,66 +543,103 @@ socreate(int dom, struct socket **aso, i #endif uid = kauth_cred_geteuid(l->l_cred); so->so_uidinfo = uid_find(uid); - so->so_egid = kauth_cred_getegid(l->l_cred); so->so_cpid = l->l_proc->p_pid; - if (lockso != NULL) { - /* Caller wants us to share a lock. */ + + /* + * Lock assigned and taken during PCB attach, unless we share + * the lock with another socket, e.g. socketpair(2) case. + */ + if (lockso) { lock = lockso->so_lock; so->so_lock = lock; mutex_obj_hold(lock); mutex_enter(lock); - } else { - /* Lock assigned and taken during PRU_ATTACH. */ } - error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, - (struct mbuf *)(long)proto, NULL, l); + + /* Attach the PCB (returns with the socket lock held). */ + error = (*prp->pr_usrreqs->pr_attach)(so, proto); KASSERT(solocked(so)); - if (error != 0) { + + if (error) { + KASSERT(so->so_pcb == NULL); so->so_state |= SS_NOFDREF; sofree(so); return error; } + so->so_cred = kauth_cred_dup(l->l_cred); sounlock(so); + *aso = so; return 0; } -/* On success, write file descriptor to fdout and return zero. On - * failure, return non-zero; *fdout will be undefined. +/* + * fsocreate: create a socket and a file descriptor associated with it. + * + * => On success, write file descriptor to fdout and return zero. + * => On failure, return non-zero; *fdout will be undefined. */ int -fsocreate(int domain, struct socket **sop, int type, int protocol, - struct lwp *l, int *fdout) +fsocreate(int domain, struct socket **sop, int type, int proto, int *fdout) { - struct socket *so; - struct file *fp; - int fd, error; + lwp_t *l = curlwp; + int error, fd, flags; + struct socket *so; + struct file *fp; - if ((error = fd_allocfile(&fp, &fd)) != 0) - return (error); - fp->f_flag = FREAD|FWRITE; + if ((error = fd_allocfile(&fp, &fd)) != 0) { + return error; + } + flags = type & SOCK_FLAGS_MASK; + fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); + fp->f_flag = FREAD|FWRITE|((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)| + ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0); fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - error = socreate(domain, &so, type, protocol, l, NULL); - if (error != 0) { + + type &= ~SOCK_FLAGS_MASK; + error = socreate(domain, &so, type, proto, l, NULL); + if (error) { fd_abort(curproc, fp, fd); - } else { - if (sop != NULL) - *sop = so; - fp->f_data = so; - fd_affix(curproc, fp, fd); - *fdout = fd; + return error; + } + if (flags & SOCK_NONBLOCK) { + so->so_state |= SS_NBIO; } + fp->f_socket = so; + fd_affix(curproc, fp, fd); + + if (sop != NULL) { + *sop = so; + } + *fdout = fd; return error; } int -sobind(struct socket *so, struct mbuf *nam, struct lwp *l) +sofamily(const struct socket *so) +{ + const struct protosw *pr; + const struct domain *dom; + + if ((pr = so->so_proto) == NULL) + return AF_UNSPEC; + if ((dom = pr->pr_domain) == NULL) + return AF_UNSPEC; + return dom->dom_family; +} + +int +sobind(struct socket *so, struct sockaddr *nam, struct lwp *l) { int error; solock(so); - error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, l); + if (nam->sa_family != so->so_proto->pr_domain->dom_family) { + sounlock(so); + return EAFNOSUPPORT; + } + error = (*so->so_proto->pr_usrreqs->pr_bind)(so, nam, l); sounlock(so); return error; } @@ -558,24 +648,29 @@ int solisten(struct socket *so, int backlog, struct lwp *l) { int error; + short oldopt, oldqlimit; solock(so); - if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | + if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) != 0) { - sounlock(so); - return (EOPNOTSUPP); - } - error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, - NULL, NULL, l); - if (error != 0) { sounlock(so); - return error; + return EINVAL; } + oldopt = so->so_options; + oldqlimit = so->so_qlimit; if (TAILQ_EMPTY(&so->so_q)) so->so_options |= SO_ACCEPTCONN; if (backlog < 0) backlog = 0; so->so_qlimit = min(backlog, somaxconn); + + error = (*so->so_proto->pr_usrreqs->pr_listen)(so, l); + if (error != 0) { + so->so_options = oldopt; + so->so_qlimit = oldqlimit; + sounlock(so); + return error; + } sounlock(so); return 0; } @@ -614,29 +709,24 @@ sofree(struct socket *so) KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); sorflush(so); refs = so->so_aborting; /* XXX */ -#ifdef INET - /* remove acccept filter if one is present. */ + /* Remove acccept filter if one is present. */ if (so->so_accf != NULL) - do_setopt_accept_filter(so, NULL); -#endif + (void)accept_filt_clear(so); sounlock(so); if (refs == 0) /* XXX */ soput(so); } /* - * Close a socket on last file table reference removal. - * Initiate disconnect if connected. - * Free socket when disconnect complete. + * soclose: close a socket on last file table reference removal. + * Initiate disconnect if connected. Free socket when disconnect complete. */ int soclose(struct socket *so) { - struct socket *so2; - int error; - int error2; + struct socket *so2; + int error = 0; - error = 0; solock(so); if (so->so_options & SO_ACCEPTCONN) { for (;;) { @@ -659,7 +749,7 @@ soclose(struct socket *so) break; } } - if (so->so_pcb == 0) + if (so->so_pcb == NULL) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { @@ -668,10 +758,11 @@ soclose(struct socket *so) goto drop; } if (so->so_options & SO_LINGER) { - if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio) + if ((so->so_state & (SS_ISDISCONNECTING|SS_NBIO)) == + (SS_ISDISCONNECTING|SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = sowait(so, so->so_linger * hz); + error = sowait(so, true, so->so_linger * hz); if (error) break; } @@ -679,17 +770,15 @@ soclose(struct socket *so) } drop: if (so->so_pcb) { - error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, - NULL, NULL, NULL, NULL); - if (error == 0) - error = error2; + KASSERT(solocked(so)); + (*so->so_proto->pr_usrreqs->pr_detach)(so); } discard: - if (so->so_state & SS_NOFDREF) - panic("soclose: NOFDREF"); + KASSERT((so->so_state & SS_NOFDREF) == 0); + kauth_cred_free(so->so_cred); so->so_state |= SS_NOFDREF; sofree(so); - return (error); + return error; } /* @@ -700,13 +789,12 @@ soabort(struct socket *so) { u_int refs; int error; - + KASSERT(solocked(so)); KASSERT(so->so_head == NULL); so->so_aborting++; /* XXX */ - error = (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, - NULL, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_abort)(so); refs = --so->so_aborting; /* XXX */ if (error || (refs == 0)) { sofree(so); @@ -717,35 +805,32 @@ soabort(struct socket *so) } int -soaccept(struct socket *so, struct mbuf *nam) +soaccept(struct socket *so, struct sockaddr *nam) { - int error; + int error; KASSERT(solocked(so)); + KASSERT((so->so_state & SS_NOFDREF) != 0); - error = 0; - if ((so->so_state & SS_NOFDREF) == 0) - panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) - error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, - NULL, nam, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_accept)(so, nam); else error = ECONNABORTED; - return (error); + return error; } int -soconnect(struct socket *so, struct mbuf *nam, struct lwp *l) +soconnect(struct socket *so, struct sockaddr *nam, struct lwp *l) { - int error; + int error; KASSERT(solocked(so)); if (so->so_options & SO_ACCEPTCONN) - return (EOPNOTSUPP); + return EOPNOTSUPP; /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. @@ -754,24 +839,24 @@ soconnect(struct socket *so, struct mbuf */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || - (error = sodisconnect(so)))) + (error = sodisconnect(so)))) { error = EISCONN; - else - error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, - NULL, nam, NULL, l); - return (error); + } else { + if (nam->sa_family != so->so_proto->pr_domain->dom_family) { + return EAFNOSUPPORT; + } + error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l); + } + + return error; } int soconnect2(struct socket *so1, struct socket *so2) { - int error; - KASSERT(solocked2(so1, so2)); - error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, - NULL, (struct mbuf *)so2, NULL, NULL); - return (error); + return (*so1->so_proto->pr_usrreqs->pr_connect2)(so1, so2); } int @@ -786,10 +871,8 @@ sodisconnect(struct socket *so) } else if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; } else { - error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, - NULL, NULL, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_disconnect)(so); } - sodopendfree(); return (error); } @@ -812,16 +895,14 @@ sodisconnect(struct socket *so) * Data and control buffers are freed on return. */ int -sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags, struct lwp *l) +sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, + struct mbuf *top, struct mbuf *control, int flags, struct lwp *l) { struct mbuf **mp, *m; - struct proc *p; long space, len, resid, clen, mlen; int error, s, dontroute, atomic; + short wakeup_state = 0; - p = l->l_proc; - sodopendfree(); clen = 0; /* @@ -868,12 +949,11 @@ sosend(struct socket *so, struct mbuf *a } if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) { - if ((so->so_state & SS_ISCONFIRMING) == 0 && - !(resid == 0 && clen != 0)) { + if (resid || clen == 0) { error = ENOTCONN; goto release; } - } else if (addr == 0) { + } else if (addr == NULL) { error = EDESTADDRREQ; goto release; } @@ -888,16 +968,22 @@ sosend(struct socket *so, struct mbuf *a } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { - if (so->so_nbio) { + if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { error = EWOULDBLOCK; goto release; } sbunlock(&so->so_snd); + if (wakeup_state & SS_RESTARTSYS) { + error = ERESTART; + goto out; + } error = sbwait(&so->so_snd); if (error) goto out; + wakeup_state = so->so_state; goto restart; } + wakeup_state = 0; mp = ⊤ space -= clen; do { @@ -915,7 +1001,7 @@ sosend(struct socket *so, struct mbuf *a m = m_gethdr(M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; + m_reset_rcvif(m); } else { m = m_get(M_WAIT, MT_DATA); mlen = MLEN; @@ -932,7 +1018,7 @@ sosend(struct socket *so, struct mbuf *a } if (resid >= MINCLSIZE && space >= MCLBYTES) { SOSEND_COUNTER_INCR(&sosend_copy_big); - m_clget(m, M_WAIT); + m_clget(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; @@ -981,9 +1067,13 @@ sosend(struct socket *so, struct mbuf *a so->so_options |= SO_DONTROUTE; if (resid > 0) so->so_state |= SS_MORETOCOME; - error = (*so->so_proto->pr_usrreq)(so, - (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, - top, addr, control, curlwp); + if (flags & MSG_OOB) { + error = (*so->so_proto->pr_usrreqs->pr_sendoob)( + so, top, control); + } else { + error = (*so->so_proto->pr_usrreqs->pr_send)(so, + top, addr, control, l); + } if (dontroute) so->so_options &= ~SO_DONTROUTE; if (resid > 0) @@ -1068,11 +1158,13 @@ soreceive(struct socket *so, struct mbuf { struct lwp *l = curlwp; struct mbuf *m, **mp, *mt; - int atomic, flags, len, error, s, offset, moff, type, orig_resid; + size_t len, offset, moff, orig_resid; + int atomic, flags, error, s, type; const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; const struct domain *dom; + short wakeup_state = 0; pr = so->so_proto; atomic = pr->pr_flags & PR_ATOMIC; @@ -1090,20 +1182,16 @@ soreceive(struct socket *so, struct mbuf else flags = 0; - if ((flags & MSG_DONTWAIT) == 0) - sodopendfree(); - if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); - error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), NULL, l); + error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); sounlock(so); if (error) goto bad; do { error = uiomove(mtod(m, void *), - (int) min(uio->uio_resid, m->m_len), uio); + MIN(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid > 0 && error == 0 && m); bad: @@ -1121,9 +1209,6 @@ soreceive(struct socket *so, struct mbuf */ s = splsoftnet(); solock(so); - if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, l); - restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) { sounlock(so); @@ -1180,19 +1265,24 @@ soreceive(struct socket *so, struct mbuf } if (uio->uio_resid == 0) goto release; - if (so->so_nbio || (flags & MSG_DONTWAIT)) { + if ((so->so_state & SS_NBIO) || + (flags & (MSG_DONTWAIT|MSG_NBIO))) { error = EWOULDBLOCK; goto release; } SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(&so->so_rcv); - error = sbwait(&so->so_rcv); + if (wakeup_state & SS_RESTARTSYS) + error = ERESTART; + else + error = sbwait(&so->so_rcv); if (error != 0) { sounlock(so); splx(s); return error; } + wakeup_state = so->so_state; goto restart; } dontblock: @@ -1237,12 +1327,35 @@ soreceive(struct socket *so, struct mbuf m->m_next = NULL; m = so->so_rcv.sb_mb; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m = so->so_rcv.sb_mb = m_free(m); } sbsync(&so->so_rcv, nextrecord); } } + if (pr->pr_flags & PR_ADDR_OPT) { + /* + * For SCTP we may be getting a + * whole message OR a partial delivery. + */ + if (m->m_type == MT_SONAME) { + orig_resid = 0; + if (flags & MSG_PEEK) { + if (paddr) + *paddr = m_copy(m, 0, m->m_len); + m = m->m_next; + } else { + sbfree(&so->so_rcv, m); + if (paddr) { + *paddr = m; + so->so_rcv.sb_mb = m->m_next; + m->m_next = 0; + m = so->so_rcv.sb_mb; + } else { + m = so->so_rcv.sb_mb = m_free(m); + } + } + } + } /* * Process one or more MT_CONTROL mbufs present before any data mbufs @@ -1281,7 +1394,9 @@ soreceive(struct socket *so, struct mbuf type == SCM_RIGHTS) { sounlock(so); splx(s); - error = (*dom->dom_externalize)(cm, l); + error = (*dom->dom_externalize)(cm, l, + (flags & MSG_CMSG_CLOEXEC) ? + O_CLOEXEC : 0); s = splsoftnet(); solock(so); } @@ -1295,7 +1410,7 @@ soreceive(struct socket *so, struct mbuf */ if (dom->dom_dispose != NULL && type == SCM_RIGHTS) { - sounlock(so); + sounlock(so); (*dom->dom_dispose)(cm); solock(so); } @@ -1331,6 +1446,7 @@ soreceive(struct socket *so, struct mbuf panic("receive 3"); #endif so->so_state &= ~SS_RCVATMARK; + wakeup_state = 0; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; @@ -1349,7 +1465,7 @@ soreceive(struct socket *so, struct mbuf SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); sounlock(so); splx(s); - error = uiomove(mtod(m, char *) + moff, (int)len, uio); + error = uiomove(mtod(m, char *) + moff, len, uio); s = splsoftnet(); solock(so); if (error != 0) { @@ -1374,6 +1490,10 @@ soreceive(struct socket *so, struct mbuf if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; +#ifdef SCTP + if (m->m_flags & M_NOTIFICATION) + flags |= MSG_NOTIFICATION; +#endif /* SCTP */ if (flags & MSG_PEEK) { m = m->m_next; moff = 0; @@ -1386,8 +1506,7 @@ soreceive(struct socket *so, struct mbuf so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m = so->so_rcv.sb_mb = m_free(m); } /* * If m != NULL, we also know that @@ -1459,11 +1578,13 @@ soreceive(struct socket *so, struct mbuf * get it filled again. */ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, - NULL, (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); - error = sbwait(&so->so_rcv); + if (wakeup_state & SS_RESTARTSYS) + error = ERESTART; + else + error = sbwait(&so->so_rcv); if (error != 0) { sbunlock(&so->so_rcv); sounlock(so); @@ -1472,6 +1593,7 @@ soreceive(struct socket *so, struct mbuf } if ((m = so->so_rcv.sb_mb) != NULL) nextrecord = m->m_nextpkt; + wakeup_state = so->so_state; } } @@ -1497,8 +1619,7 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, - (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1532,13 +1653,31 @@ soshutdown(struct socket *so, int how) error = 0; } if (how == SHUT_WR || how == SHUT_RDWR) - error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, - NULL, NULL, NULL); + error = (*pr->pr_usrreqs->pr_shutdown)(so); return error; } void +sorestart(struct socket *so) +{ + /* + * An application has called close() on an fd on which another + * of its threads has called a socket system call. + * Mark this and wake everyone up, and code that would block again + * instead returns ERESTART. + * On system call re-entry the fd is validated and EBADF returned. + * Any other fd will block again on the 2nd syscall. + */ + solock(so); + so->so_state |= SS_RESTARTSYS; + cv_broadcast(&so->so_cv); + cv_broadcast(&so->so_snd.sb_cv); + cv_broadcast(&so->so_rcv.sb_cv); + sounlock(so); +} + +void sorflush(struct socket *so) { struct sockbuf *sb, asb; @@ -1573,35 +1712,34 @@ sorflush(struct socket *so) static int sosetopt1(struct socket *so, const struct sockopt *sopt) { - int error, optval; + int error = EINVAL, opt; + int optval = 0; /* XXX: gcc */ struct linger l; struct timeval tv; - switch (sopt->sopt_name) { + switch ((opt = sopt->sopt_name)) { -#ifdef INET case SO_ACCEPTFILTER: - error = do_setopt_accept_filter(so, sopt); - if (error) - return error; + error = accept_filt_setopt(so, sopt); + KASSERT(solocked(so)); break; -#endif - case SO_LINGER: - error = sockopt_get(sopt, &l, sizeof(l)); - if (error) - return (error); - - if (l.l_linger < 0 || l.l_linger > USHRT_MAX || - l.l_linger > (INT_MAX / hz)) - return EDOM; - so->so_linger = l.l_linger; - if (l.l_onoff) - so->so_options |= SO_LINGER; - else - so->so_options &= ~SO_LINGER; - - break; + case SO_LINGER: + error = sockopt_get(sopt, &l, sizeof(l)); + solock(so); + if (error) + break; + if (l.l_linger < 0 || l.l_linger > USHRT_MAX || + l.l_linger > (INT_MAX / hz)) { + error = EDOM; + break; + } + so->so_linger = l.l_linger; + if (l.l_onoff) + so->so_options |= SO_LINGER; + else + so->so_options &= ~SO_LINGER; + break; case SO_DEBUG: case SO_KEEPALIVE: @@ -1612,14 +1750,18 @@ sosetopt1(struct socket *so, const struc case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: + case SO_NOSIGPIPE: +#ifdef SO_OTIMESTAMP + case SO_OTIMESTAMP: +#endif error = sockopt_getint(sopt, &optval); + solock(so); if (error) - return (error); - + break; if (optval) - so->so_options |= sopt->sopt_name; + so->so_options |= opt; else - so->so_options &= ~sopt->sopt_name; + so->so_options &= ~opt; break; case SO_SNDBUF: @@ -1627,28 +1769,33 @@ sosetopt1(struct socket *so, const struc case SO_SNDLOWAT: case SO_RCVLOWAT: error = sockopt_getint(sopt, &optval); + solock(so); if (error) - return (error); + break; /* * Values < 1 make no sense for any of these * options, so disallow them. */ - if (optval < 1) - return EINVAL; + if (optval < 1) { + error = EINVAL; + break; + } - switch (sopt->sopt_name) { + switch (opt) { case SO_SNDBUF: - if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) - return ENOBUFS; - + if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { + error = ENOBUFS; + break; + } so->so_snd.sb_flags &= ~SB_AUTOSIZE; break; case SO_RCVBUF: - if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) - return ENOBUFS; - + if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) { + error = ENOBUFS; + break; + } so->so_rcv.sb_flags &= ~SB_AUTOSIZE; break; @@ -1672,20 +1819,40 @@ sosetopt1(struct socket *so, const struc } break; +#ifdef COMPAT_50 + case SO_OSNDTIMEO: + case SO_ORCVTIMEO: { + struct timeval50 otv; + error = sockopt_get(sopt, &otv, sizeof(otv)); + if (error) { + solock(so); + break; + } + timeval50_to_timeval(&otv, &tv); + opt = opt == SO_OSNDTIMEO ? SO_SNDTIMEO : SO_RCVTIMEO; + error = 0; + /*FALLTHROUGH*/ + } +#endif /* COMPAT_50 */ + case SO_SNDTIMEO: case SO_RCVTIMEO: - error = sockopt_get(sopt, &tv, sizeof(tv)); if (error) - return (error); + error = sockopt_get(sopt, &tv, sizeof(tv)); + solock(so); + if (error) + break; - if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) - return EDOM; + if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) { + error = EDOM; + break; + } optval = tv.tv_sec * hz + tv.tv_usec / tick; if (optval == 0 && tv.tv_usec != 0) optval = 1; - switch (sopt->sopt_name) { + switch (opt) { case SO_SNDTIMEO: so->so_snd.sb_timeo = optval; break; @@ -1696,9 +1863,12 @@ sosetopt1(struct socket *so, const struc break; default: - return ENOPROTOOPT; + solock(so); + error = ENOPROTOOPT; + break; } - return 0; + KASSERT(solocked(so)); + return error; } int @@ -1706,11 +1876,13 @@ sosetopt(struct socket *so, struct socko { int error, prerr; - solock(so); - if (sopt->sopt_level == SOL_SOCKET) + if (sopt->sopt_level == SOL_SOCKET) { error = sosetopt1(so, sopt); - else + KASSERT(solocked(so)); + } else { error = ENOPROTOOPT; + solock(so); + } if ((error == 0 || error == ENOPROTOOPT) && so->so_proto != NULL && so->so_proto->pr_ctloutput != NULL) { @@ -1746,24 +1918,22 @@ so_setsockopt(struct lwp *l, struct sock return error; } - + /* * internal get SOL_SOCKET options */ static int sogetopt1(struct socket *so, struct sockopt *sopt) { - int error, optval; + int error, optval, opt; struct linger l; struct timeval tv; - switch (sopt->sopt_name) { + switch ((opt = sopt->sopt_name)) { -#ifdef INET case SO_ACCEPTFILTER: - error = do_getopt_accept_filter(so, sopt); + error = accept_filt_getopt(so, sopt); break; -#endif case SO_LINGER: l.l_onoff = (so->so_options & SO_LINGER) ? 1 : 0; @@ -1781,8 +1951,12 @@ sogetopt1(struct socket *so, struct sock case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: - error = sockopt_setint(sopt, - (so->so_options & sopt->sopt_name) ? 1 : 0); + case SO_NOSIGPIPE: +#ifdef SO_OTIMESTAMP + case SO_OTIMESTAMP: +#endif + case SO_ACCEPTCONN: + error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); break; case SO_TYPE: @@ -1810,9 +1984,25 @@ sogetopt1(struct socket *so, struct sock error = sockopt_setint(sopt, so->so_rcv.sb_lowat); break; +#ifdef COMPAT_50 + case SO_OSNDTIMEO: + case SO_ORCVTIMEO: { + struct timeval50 otv; + + optval = (opt == SO_OSNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + otv.tv_sec = optval / hz; + otv.tv_usec = (optval % hz) * tick; + + error = sockopt_set(sopt, &otv, sizeof(otv)); + break; + } +#endif /* COMPAT_50 */ + case SO_SNDTIMEO: case SO_RCVTIMEO: - optval = (sopt->sopt_name == SO_SNDTIMEO ? + optval = (opt == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); tv.tv_sec = optval / hz; @@ -1856,22 +2046,26 @@ sogetopt(struct socket *so, struct socko * alloc sockopt data buffer buffer * - will be released at destroy */ -static void -sockopt_alloc(struct sockopt *sopt, size_t len) +static int +sockopt_alloc(struct sockopt *sopt, size_t len, km_flag_t kmflag) { KASSERT(sopt->sopt_size == 0); - if (len > sizeof(sopt->sopt_buf)) - sopt->sopt_data = kmem_zalloc(len, KM_SLEEP); - else + if (len > sizeof(sopt->sopt_buf)) { + sopt->sopt_data = kmem_zalloc(len, kmflag); + if (sopt->sopt_data == NULL) + return ENOMEM; + } else sopt->sopt_data = sopt->sopt_buf; sopt->sopt_size = len; + return 0; } /* * initialise sockopt storage + * - MAY sleep during allocation */ void sockopt_init(struct sockopt *sopt, int level, int name, size_t size) @@ -1881,7 +2075,7 @@ sockopt_init(struct sockopt *sopt, int l sopt->sopt_level = level; sopt->sopt_name = name; - sockopt_alloc(sopt, size); + (void)sockopt_alloc(sopt, size, KM_SLEEP); } /* @@ -1901,14 +2095,18 @@ sockopt_destroy(struct sockopt *sopt) /* * set sockopt value * - value is copied into sockopt - * - memory is allocated when necessary + * - memory is allocated when necessary, will not sleep */ int sockopt_set(struct sockopt *sopt, const void *buf, size_t len) { + int error; - if (sopt->sopt_size == 0) - sockopt_alloc(sopt, len); + if (sopt->sopt_size == 0) { + error = sockopt_alloc(sopt, len, KM_NOSLEEP); + if (error) + return error; + } KASSERT(sopt->sopt_size == len); memcpy(sopt->sopt_data, buf, len); @@ -1954,16 +2152,21 @@ sockopt_getint(const struct sockopt *sop * set sockopt value from mbuf * - ONLY for legacy code * - mbuf is released by sockopt + * - will not sleep */ int sockopt_setmbuf(struct sockopt *sopt, struct mbuf *m) { size_t len; + int error; len = m_length(m); - if (sopt->sopt_size == 0) - sockopt_alloc(sopt, len); + if (sopt->sopt_size == 0) { + error = sockopt_alloc(sopt, len, KM_NOSLEEP); + if (error) + return error; + } KASSERT(sopt->sopt_size == len); m_copydata(m, 0, len, sopt->sopt_data); @@ -1976,23 +2179,30 @@ sockopt_setmbuf(struct sockopt *sopt, st * get sockopt value into mbuf * - ONLY for legacy code * - mbuf to be released by the caller + * - will not sleep */ struct mbuf * sockopt_getmbuf(const struct sockopt *sopt) { struct mbuf *m; - m = m_get(M_WAIT, MT_SOOPTS); - if (m == NULL) + if (sopt->sopt_size > MCLBYTES) return NULL; - m->m_len = MLEN; - m_copyback(m, 0, sopt->sopt_size, sopt->sopt_data); - if (m_length(m) != max(sopt->sopt_size, MLEN)) { - m_freem(m); + m = m_get(M_DONTWAIT, MT_SOOPTS); + if (m == NULL) return NULL; + + if (sopt->sopt_size > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return NULL; + } } - m->m_len = min(sopt->sopt_size, MLEN); + + memcpy(mtod(m, void *), sopt->sopt_data, sopt->sopt_size); + m->m_len = sopt->sopt_size; return m; } @@ -2002,7 +2212,7 @@ sohasoutofband(struct socket *so) { fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); - selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, 0); + selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, NOTE_SUBMIT); } static void @@ -2010,7 +2220,7 @@ filt_sordetach(struct knote *kn) { struct socket *so; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) @@ -2025,7 +2235,7 @@ filt_soread(struct knote *kn, long hint) struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; if (hint != NOTE_SUBMIT) solock(so); kn->kn_data = so->so_rcv.sb_cc; @@ -2037,7 +2247,7 @@ filt_soread(struct knote *kn, long hint) rv = 1; else if (kn->kn_sfflags & NOTE_LOWAT) rv = (kn->kn_data >= kn->kn_sdata); - else + else rv = (kn->kn_data >= so->so_rcv.sb_lowat); if (hint != NOTE_SUBMIT) sounlock(so); @@ -2049,7 +2259,7 @@ filt_sowdetach(struct knote *kn) { struct socket *so; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) @@ -2064,7 +2274,7 @@ filt_sowrite(struct knote *kn, long hint struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; if (hint != NOTE_SUBMIT) solock(so); kn->kn_data = sbspace(&so->so_snd); @@ -2093,7 +2303,7 @@ filt_solisten(struct knote *kn, long hin struct socket *so; int rv; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; /* * Set kn_data to number of incoming connections, not @@ -2121,7 +2331,7 @@ soo_kqfilter(struct file *fp, struct kno struct socket *so; struct sockbuf *sb; - so = ((file_t *)kn->kn_obj)->f_data; + so = ((file_t *)kn->kn_obj)->f_socket; solock(so); switch (kn->kn_filter) { case EVFILT_READ: @@ -2203,6 +2413,7 @@ sopoll(struct socket *so, int events) #include static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); +static int sysctl_kern_sbmax(SYSCTLFN_PROTO); /* * sysctl helper routine for kern.somaxkva. ensures that the given @@ -2233,20 +2444,48 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) return (error); } -SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") +/* + * sysctl helper routine for kern.sbmax. Basically just ensures that + * any new value is not too small. + */ +static int +sysctl_kern_sbmax(SYSCTLFN_ARGS) +{ + int error, new_sbmax; + struct sysctlnode node; + + new_sbmax = sb_max; + node = *rnode; + node.sysctl_data = &new_sbmax; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + KERNEL_LOCK(1, NULL); + error = sb_max_set(new_sbmax); + KERNEL_UNLOCK_ONE(NULL); + + return (error); +} + +static void +sysctl_kern_socket_setup(void) { - sysctl_createv(clog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_NODE, "kern", NULL, - NULL, 0, NULL, 0, - CTL_KERN, CTL_EOL); + KASSERT(socket_sysctllog == NULL); - sysctl_createv(clog, 0, NULL, NULL, + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "somaxkva", SYSCTL_DESCR("Maximum amount of kernel memory to be " "used for socket buffers"), sysctl_kern_somaxkva, 0, NULL, 0, CTL_KERN, KERN_SOMAXKVA, CTL_EOL); + + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "sbmax", + SYSCTL_DESCR("Maximum socket buffer size"), + sysctl_kern_sbmax, 0, NULL, 0, + CTL_KERN, KERN_SBMAX, CTL_EOL); }