Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.111.2.9 retrieving revision 1.118 diff -u -p -r1.111.2.9 -r1.118 --- src/sys/kern/uipc_socket.c 2007/11/15 11:44:56 1.111.2.9 +++ src/sys/kern/uipc_socket.c 2006/05/14 21:15:12 1.118 @@ -1,7 +1,7 @@ -/* $NetBSD: uipc_socket.c,v 1.111.2.9 2007/11/15 11:44:56 yamt Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.118 2006/05/14 21:15:12 elad Exp $ */ /*- - * Copyright (c) 2002, 2007 The NetBSD Foundation, Inc. + * Copyright (c) 2002 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.111.2.9 2007/11/15 11:44:56 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.118 2006/05/14 21:15:12 elad Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -79,7 +79,6 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include -#include #include #include #include @@ -93,19 +92,14 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include -#include -#include #include -POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL, - IPL_SOFTNET); +POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); -extern const struct fileops socketops; - extern int somaxconn; /* patchable (XXX sysctl) */ int somaxconn = SOMAXCONN; @@ -133,15 +127,23 @@ EVCNT_ATTACH_STATIC(sosend_kvalimit); #endif /* SOSEND_COUNTERS */ -static struct callback_entry sokva_reclaimerentry; +void +soinit(void) +{ + + /* Set the initial adjusted socket buffer size. */ + if (sb_max_set(sb_max)) + panic("bad initial sb_max value: %lu", sb_max); + +} #ifdef SOSEND_NO_LOAN -int sock_loan_thresh = -1; +int use_sosend_loan = 0; #else -int sock_loan_thresh = 4096; +int use_sosend_loan = 1; #endif -static kmutex_t so_pendfree_lock; +static struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; static struct mbuf *so_pendfree; #ifndef SOMAXKVA @@ -149,8 +151,9 @@ static struct mbuf *so_pendfree; #endif int somaxkva = SOMAXKVA; static int socurkva; -static kcondvar_t socurkva_cv; +static int sokvawaiters; +#define SOCK_LOAN_THRESH 4096 #define SOCK_LOAN_CHUNK 65536 static size_t sodopendfree(void); @@ -159,9 +162,11 @@ static size_t sodopendfreel(void); static vsize_t sokvareserve(struct socket *so, vsize_t len) { + int s; int error; - mutex_enter(&so_pendfree_lock); + s = splvm(); + simple_lock(&so_pendfree_slock); while (socurkva + len > somaxkva) { size_t freed; @@ -179,25 +184,33 @@ sokvareserve(struct socket *so, vsize_t continue; SOSEND_COUNTER_INCR(&sosend_kvalimit); - error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock); + sokvawaiters++; + error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, + &so_pendfree_slock); + sokvawaiters--; if (error) { len = 0; break; } } socurkva += len; - mutex_exit(&so_pendfree_lock); + simple_unlock(&so_pendfree_slock); + splx(s); return len; } static void sokvaunreserve(vsize_t len) { + int s; - mutex_enter(&so_pendfree_lock); + s = splvm(); + simple_lock(&so_pendfree_slock); socurkva -= len; - cv_broadcast(&socurkva_cv); - mutex_exit(&so_pendfree_lock); + if (sokvawaiters) + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); } /* @@ -251,23 +264,30 @@ sokvafree(vaddr_t sva, vsize_t len) } static void -sodoloanfree(struct vm_page **pgs, void *buf, size_t size, bool mapped) +sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) { - vaddr_t sva, eva; + vaddr_t va, sva, eva; vsize_t len; - int npgs; - - KASSERT(pgs != NULL); + paddr_t pa; + int i, npgs; eva = round_page((vaddr_t) buf + size); sva = trunc_page((vaddr_t) buf); len = eva - sva; npgs = len >> PAGE_SHIFT; - if (mapped) { - pmap_kremove(sva, len); - pmap_update(pmap_kernel()); + if (__predict_false(pgs == NULL)) { + pgs = alloca(npgs * sizeof(*pgs)); + + for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { + if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) + panic("sodoloanfree: va 0x%lx not mapped", va); + pgs[i] = PHYS_TO_VM_PAGE(pa); + } } + + pmap_kremove(sva, len); + pmap_update(pmap_kernel()); uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); sokvafree(sva, len); } @@ -275,58 +295,75 @@ sodoloanfree(struct vm_page **pgs, void static size_t sodopendfree() { + int s; size_t rv; - mutex_enter(&so_pendfree_lock); + s = splvm(); + simple_lock(&so_pendfree_slock); rv = sodopendfreel(); - mutex_exit(&so_pendfree_lock); + simple_unlock(&so_pendfree_slock); + splx(s); return rv; } /* * sodopendfreel: free mbufs on "pendfree" list. - * unlock and relock so_pendfree_lock when freeing mbufs. + * unlock and relock so_pendfree_slock when freeing mbufs. * - * => called with so_pendfree_lock held. + * => called with so_pendfree_slock held. + * => called at splvm. */ static size_t sodopendfreel() { - struct mbuf *m, *next; size_t rv = 0; - KASSERT(mutex_owned(&so_pendfree_lock)); + LOCK_ASSERT(simple_lock_held(&so_pendfree_slock)); + + for (;;) { + struct mbuf *m; + struct mbuf *next; - while (so_pendfree != NULL) { m = so_pendfree; + if (m == NULL) + break; so_pendfree = NULL; - mutex_exit(&so_pendfree_lock); + simple_unlock(&so_pendfree_slock); + /* XXX splx */ for (; m != NULL; m = next) { next = m->m_next; - KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0); - KASSERT(m->m_ext.ext_refcnt == 0); rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf, - m->m_ext.ext_size, - (m->m_ext.ext_flags & M_EXT_LAZY) == 0); - pool_cache_put(mb_cache, m); + sodoloanfree((m->m_flags & M_EXT_PAGES) ? + m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, + m->m_ext.ext_size); + pool_cache_put(&mbpool_cache, m); } - mutex_enter(&so_pendfree_lock); + /* XXX splvm */ + simple_lock(&so_pendfree_slock); } return (rv); } void -soloanfree(struct mbuf *m, void *buf, size_t size, void *arg) +soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) { + int s; - KASSERT(m != NULL); + if (m == NULL) { + + /* + * called from MEXTREMOVE. + */ + + sodoloanfree(NULL, buf, size); + return; + } /* * postpone freeing mbuf. @@ -335,11 +372,14 @@ soloanfree(struct mbuf *m, void *buf, si * because we need to put kva back to kernel_map. */ - mutex_enter(&so_pendfree_lock); + s = splvm(); + simple_lock(&so_pendfree_slock); m->m_next = so_pendfree; so_pendfree = m; - cv_broadcast(&socurkva_cv); - mutex_exit(&so_pendfree_lock); + if (sokvawaiters) + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); } static long @@ -348,12 +388,8 @@ sosend_loan(struct socket *so, struct ui struct iovec *iov = uio->uio_iov; vaddr_t sva, eva; vsize_t len; - vaddr_t lva; - int npgs, error; -#if !defined(__HAVE_LAZY_MBUF) - vaddr_t va; - int i; -#endif /* !defined(__HAVE_LAZY_MBUF) */ + vaddr_t lva, va; + int npgs, i, error; if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) return (0); @@ -382,26 +418,19 @@ sosend_loan(struct socket *so, struct ui return (0); } -#if !defined(__HAVE_LAZY_MBUF) for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), VM_PROT_READ); pmap_update(pmap_kernel()); -#endif /* !defined(__HAVE_LAZY_MBUF) */ lva += (vaddr_t) iov->iov_base & PAGE_MASK; - MEXTADD(m, (void *) lva, space, M_MBUF, soloanfree, so); + MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; -#if defined(__HAVE_LAZY_MBUF) - m->m_flags |= M_EXT_LAZY; - m->m_ext.ext_flags |= M_EXT_LAZY; -#endif /* defined(__HAVE_LAZY_MBUF) */ - uio->uio_resid -= space; /* uio_offset not updated, not set/used for write(2) */ - uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + space; + uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; uio->uio_iov->iov_len -= space; if (uio->uio_iov->iov_len == 0) { uio->uio_iov++; @@ -411,56 +440,6 @@ sosend_loan(struct socket *so, struct ui return (space); } -static int -sokva_reclaim_callback(struct callback_entry *ce, void *obj, void *arg) -{ - - KASSERT(ce == &sokva_reclaimerentry); - KASSERT(obj == NULL); - - sodopendfree(); - if (!vm_map_starved_p(kernel_map)) { - return CALLBACK_CHAIN_ABORT; - } - return CALLBACK_CHAIN_CONTINUE; -} - -struct mbuf * -getsombuf(struct socket *so) -{ - struct mbuf *m; - - m = m_get(M_WAIT, MT_SONAME); - MCLAIM(m, so->so_mowner); - return m; -} - -struct mbuf * -m_intopt(struct socket *so, int val) -{ - struct mbuf *m; - - m = getsombuf(so); - m->m_len = sizeof(int); - *mtod(m, int *) = val; - return m; -} - -void -soinit(void) -{ - - mutex_init(&so_pendfree_lock, MUTEX_DRIVER, IPL_VM); - cv_init(&socurkva_cv, "sokva"); - - /* Set the initial adjusted socket buffer size. */ - if (sb_max_set(sb_max)) - panic("bad initial sb_max value: %lu", sb_max); - - callback_register(&vm_map_to_kernel(kernel_map)->vmk_reclaim_callback, - &sokva_reclaimerentry, NULL, sokva_reclaim_callback); -} - /* * Socket operation routines. * These routines are called by the routines in @@ -477,32 +456,17 @@ socreate(int dom, struct socket **aso, i uid_t uid; int error, s; - error = kauth_authorize_network(l->l_cred, KAUTH_NETWORK_SOCKET, - KAUTH_REQ_NETWORK_SOCKET_OPEN, KAUTH_ARG(dom), KAUTH_ARG(type), - KAUTH_ARG(proto)); - if (error != 0) - return error; - if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); - if (prp == NULL) { - /* no support for domain */ - if (pffinddomain(dom) == 0) - return EAFNOSUPPORT; - /* no support for socket type */ - if (proto == 0 && type != 0) - return EPROTOTYPE; - return EPROTONOSUPPORT; - } - if (prp->pr_usrreq == NULL) - return EPROTONOSUPPORT; + if (prp == 0 || prp->pr_usrreq == 0) + return (EPROTONOSUPPORT); if (prp->pr_type != type) - return EPROTOTYPE; + return (EPROTOTYPE); s = splsoftnet(); so = pool_get(&socket_pool, PR_WAITOK); - memset(so, 0, sizeof(*so)); + memset((caddr_t)so, 0, sizeof(*so)); TAILQ_INIT(&so->so_q0); TAILQ_INIT(&so->so_q); so->so_type = type; @@ -514,56 +478,23 @@ socreate(int dom, struct socket **aso, i so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; so->so_mowner = &prp->pr_domain->dom_mowner; #endif - selinit(&so->so_rcv.sb_sel); - selinit(&so->so_snd.sb_sel); - uid = kauth_cred_geteuid(l->l_cred); + if (l != NULL) { + uid = kauth_cred_geteuid(l->l_proc->p_cred); + } else { + uid = 0; + } so->so_uidinfo = uid_find(uid); - error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, - (struct mbuf *)(long)proto, NULL, l); - if (error != 0) { + error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, + (struct mbuf *)(long)proto, (struct mbuf *)0, l); + if (error) { so->so_state |= SS_NOFDREF; sofree(so); splx(s); - return error; + return (error); } splx(s); *aso = so; - return 0; -} - -/* On success, write file descriptor to fdout and return zero. On - * failure, return non-zero; *fdout will be undefined. - */ -int -fsocreate(int domain, struct socket **sop, int type, int protocol, - struct lwp *l, int *fdout) -{ - struct filedesc *fdp; - struct socket *so; - struct file *fp; - int fd, error; - - fdp = l->l_proc->p_fd; - /* falloc() will use the desciptor for us */ - if ((error = falloc(l, &fp, &fd)) != 0) - return (error); - fp->f_flag = FREAD|FWRITE; - fp->f_type = DTYPE_SOCKET; - fp->f_ops = &socketops; - error = socreate(domain, &so, type, protocol, l); - if (error != 0) { - FILE_UNUSE(fp, l); - fdremove(fdp, fd); - ffree(fp); - } else { - if (sop != NULL) - *sop = so; - fp->f_data = so; - FILE_SET_MATURE(fp); - FILE_UNUSE(fp, l); - *fdout = fd; - } - return error; + return (0); } int @@ -572,9 +503,10 @@ sobind(struct socket *so, struct mbuf *n int s, error; s = splsoftnet(); - error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, l); + error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, + nam, (struct mbuf *)0, l); splx(s); - return error; + return (error); } int @@ -583,11 +515,11 @@ solisten(struct socket *so, int backlog) int s, error; s = splsoftnet(); - error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, - NULL, NULL, NULL); - if (error != 0) { + error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); + if (error) { splx(s); - return error; + return (error); } if (TAILQ_EMPTY(&so->so_q)) so->so_options |= SO_ACCEPTCONN; @@ -595,7 +527,7 @@ solisten(struct socket *so, int backlog) backlog = 0; so->so_qlimit = min(backlog, somaxconn); splx(s); - return 0; + return (0); } void @@ -621,8 +553,6 @@ sofree(struct socket *so) RLIM_INFINITY); sbrelease(&so->so_snd, so); sorflush(so); - seldestroy(&so->so_rcv.sb_sel); - seldestroy(&so->so_snd.sb_sel); pool_put(&socket_pool, so); } @@ -662,7 +592,7 @@ soclose(struct socket *so) (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = tsleep((void *)&so->so_timeo, + error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, netcls, so->so_linger * hz); if (error) @@ -673,7 +603,8 @@ soclose(struct socket *so) drop: if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, - NULL, NULL, NULL, NULL); + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, + (struct lwp *)0); if (error == 0) error = error2; } @@ -692,15 +623,9 @@ soclose(struct socket *so) int soabort(struct socket *so) { - int error; - KASSERT(so->so_head == NULL); - error = (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, - NULL, NULL, NULL); - if (error) { - sofree(so); - } - return error; + return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); } int @@ -716,7 +641,7 @@ soaccept(struct socket *so, struct mbuf if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, - NULL, nam, NULL, NULL); + (struct mbuf *)0, nam, (struct mbuf *)0, (struct lwp *)0); else error = ECONNABORTED; @@ -744,7 +669,7 @@ soconnect(struct socket *so, struct mbuf error = EISCONN; else error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, - NULL, nam, NULL, l); + (struct mbuf *)0, nam, (struct mbuf *)0, l); splx(s); return (error); } @@ -756,7 +681,8 @@ soconnect2(struct socket *so1, struct so s = splsoftnet(); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, - NULL, (struct mbuf *)so2, NULL, NULL); + (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, + (struct lwp *)0); splx(s); return (error); } @@ -776,7 +702,8 @@ sodisconnect(struct socket *so) goto bad; } error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, - NULL, NULL, NULL, NULL); + (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, + (struct lwp *)0); bad: splx(s); sodopendfree(); @@ -889,19 +816,19 @@ sosend(struct socket *so, struct mbuf *a if (flags & MSG_EOR) top->m_flags |= M_EOR; } else do { - if (top == NULL) { + if (top == 0) { m = m_gethdr(M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.rcvif = (struct ifnet *)0; } else { m = m_get(M_WAIT, MT_DATA); mlen = MLEN; } MCLAIM(m, so->so_snd.sb_mowner); - if (sock_loan_thresh >= 0 && - uio->uio_iov->iov_len >= sock_loan_thresh && - space >= sock_loan_thresh && + if (use_sosend_loan && + uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && + space >= SOCK_LOAN_THRESH && (len = sosend_loan(so, uio, m, space)) != 0) { SOSEND_COUNTER_INCR(&sosend_loan_big); @@ -933,13 +860,14 @@ sosend(struct socket *so, struct mbuf *a if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } - error = uiomove(mtod(m, void *), (int)len, uio); + error = uiomove(mtod(m, caddr_t), (int)len, + uio); have_data: resid = uio->uio_resid; m->m_len = len; *mp = m; top->m_pkthdr.len += len; - if (error != 0) + if (error) goto release; mp = &m->m_next; if (resid <= 0) { @@ -968,10 +896,10 @@ sosend(struct socket *so, struct mbuf *a splx(s); clen = 0; - control = NULL; - top = NULL; + control = 0; + top = 0; mp = ⊤ - if (error != 0) + if (error) goto release; } while (resid && space > 0); } while (resid); @@ -1018,11 +946,11 @@ soreceive(struct socket *so, struct mbuf type = 0; orig_resid = uio->uio_resid; - if (paddr != NULL) - *paddr = NULL; - if (controlp != NULL) - *controlp = NULL; - if (flagsp != NULL) + if (paddr) + *paddr = 0; + if (controlp) + *controlp = 0; + if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; @@ -1033,27 +961,29 @@ soreceive(struct socket *so, struct mbuf if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), NULL, l); + (struct mbuf *)(long)(flags & MSG_PEEK), + (struct mbuf *)0, l); if (error) goto bad; do { - error = uiomove(mtod(m, void *), + error = uiomove(mtod(m, caddr_t), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); - } while (uio->uio_resid > 0 && error == 0 && m); + } while (uio->uio_resid && error == 0 && m); bad: - if (m != NULL) + if (m) m_freem(m); - return error; + return (error); } - if (mp != NULL) - *mp = NULL; + if (mp) + *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, l); + (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, + (struct mbuf *)0, (struct mbuf *)0, l); restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) - return error; + return (error); s = splsoftnet(); m = so->so_rcv.sb_mb; @@ -1068,20 +998,17 @@ soreceive(struct socket *so, struct mbuf * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ - if (m == NULL || - ((flags & MSG_DONTWAIT) == 0 && - so->so_rcv.sb_cc < uio->uio_resid && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && - uio->uio_resid <= so->so_rcv.sb_hiwat)) && - m->m_nextpkt == NULL && - (pr->pr_flags & PR_ATOMIC) == 0)) { + if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && + so->so_rcv.sb_cc < uio->uio_resid) && + (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || + ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { #ifdef DIAGNOSTIC - if (m == NULL && so->so_rcv.sb_cc) + if (m == 0 && so->so_rcv.sb_cc) panic("receive 1"); #endif if (so->so_error) { - if (m != NULL) + if (m) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) @@ -1089,12 +1016,12 @@ soreceive(struct socket *so, struct mbuf goto release; } if (so->so_state & SS_CANTRCVMORE) { - if (m != NULL) + if (m) goto dontblock; else goto release; } - for (; m != NULL; m = m->m_next) + for (; m; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; @@ -1115,8 +1042,8 @@ soreceive(struct socket *so, struct mbuf sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); - if (error != 0) - return error; + if (error) + return (error); goto restart; } dontblock: @@ -1125,7 +1052,7 @@ soreceive(struct socket *so, struct mbuf * While we process the initial mbufs containing address and control * info, we save a copy of m->m_nextpkt into nextrecord. */ - if (l != NULL) + if (l) l->l_proc->p_stats->p_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); @@ -1144,10 +1071,10 @@ soreceive(struct socket *so, struct mbuf } else { sbfree(&so->so_rcv, m); mbuf_removed = 1; - if (paddr != NULL) { + if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; - m->m_next = NULL; + m->m_next = 0; m = so->so_rcv.sb_mb; } else { MFREE(m, so->so_rcv.sb_mb); @@ -1155,15 +1082,15 @@ soreceive(struct socket *so, struct mbuf } } } - while (m != NULL && m->m_type == MT_CONTROL && error == 0) { + while (m && m->m_type == MT_CONTROL && error == 0) { if (flags & MSG_PEEK) { - if (controlp != NULL) + if (controlp) *controlp = m_copy(m, 0, m->m_len); m = m->m_next; } else { sbfree(&so->so_rcv, m); mbuf_removed = 1; - if (controlp != NULL) { + if (controlp) { struct domain *dom = pr->pr_domain; if (dom->dom_externalize && l && mtod(m, struct cmsghdr *)->cmsg_type == @@ -1171,7 +1098,7 @@ soreceive(struct socket *so, struct mbuf error = (*dom->dom_externalize)(m, l); *controlp = m; so->so_rcv.sb_mb = m->m_next; - m->m_next = NULL; + m->m_next = 0; m = so->so_rcv.sb_mb; } else { /* @@ -1185,7 +1112,7 @@ soreceive(struct socket *so, struct mbuf m = so->so_rcv.sb_mb; } } - if (controlp != NULL) { + if (controlp) { orig_resid = 0; controlp = &(*controlp)->m_next; } @@ -1197,7 +1124,7 @@ soreceive(struct socket *so, struct mbuf * the last packet on the chain (nextrecord == NULL) and we * change m->m_nextpkt. */ - if (m != NULL) { + if (m) { if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; /* @@ -1225,7 +1152,7 @@ soreceive(struct socket *so, struct mbuf moff = 0; offset = 0; - while (m != NULL && uio->uio_resid > 0 && error == 0) { + while (m && uio->uio_resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; @@ -1249,13 +1176,13 @@ soreceive(struct socket *so, struct mbuf * we must note any additions to the sockbuf when we * block interrupts again. */ - if (mp == NULL) { + if (mp == 0) { SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); splx(s); - error = uiomove(mtod(m, char *) + moff, (int)len, uio); + error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splsoftnet(); - if (error != 0) { + if (error) { /* * If any part of the record has been removed * (such as the MT_SONAME mbuf, which will @@ -1288,7 +1215,7 @@ soreceive(struct socket *so, struct mbuf *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; - *mp = NULL; + *mp = (struct mbuf *)0; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; @@ -1309,14 +1236,16 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } - } else if (flags & MSG_PEEK) - moff += len; - else { - if (mp != NULL) - *mp = m_copym(m, 0, len, M_WAIT); - m->m_data += len; - m->m_len -= len; - so->so_rcv.sb_cc -= len; + } else { + if (flags & MSG_PEEK) + moff += len; + else { + if (mp) + *mp = m_copym(m, 0, len, M_WAIT); + m->m_data += len; + m->m_len -= len; + so->so_rcv.sb_cc -= len; + } } if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { @@ -1340,7 +1269,7 @@ soreceive(struct socket *so, struct mbuf * with a short count but without error. * Keep sockbuf locked against other readers. */ - while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && + while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; @@ -1357,14 +1286,16 @@ soreceive(struct socket *so, struct mbuf */ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, - NULL, (struct mbuf *)(long)flags, NULL, l); + (struct mbuf *)0, + (struct mbuf *)(long)flags, + (struct mbuf *)0, l); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); - if (error != 0) { + if (error) { sbunlock(&so->so_rcv); splx(s); - return 0; + return (0); } if ((m = so->so_rcv.sb_mb) != NULL) nextrecord = m->m_nextpkt; @@ -1377,7 +1308,7 @@ soreceive(struct socket *so, struct mbuf (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == NULL) { + if (m == 0) { /* * First part is an inline SB_EMPTY_FIXUP(). Second * part makes sure sb_lastrecord is up-to-date if @@ -1393,8 +1324,8 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, - (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, + (struct mbuf *)(long)flags, (struct mbuf *)0, l); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1403,12 +1334,12 @@ soreceive(struct socket *so, struct mbuf goto restart; } - if (flagsp != NULL) + if (flagsp) *flagsp |= flags; release: sbunlock(&so->so_rcv); splx(s); - return error; + return (error); } int @@ -1423,9 +1354,9 @@ soshutdown(struct socket *so, int how) if (how == SHUT_RD || how == SHUT_RDWR) sorflush(so); if (how == SHUT_WR || how == SHUT_RDWR) - return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, - NULL, NULL, NULL); - return 0; + return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); + return (0); } void @@ -1455,140 +1386,151 @@ sorflush(struct socket *so) sbrelease(&asb, so); } -static int -sosetopt1(struct socket *so, int level, int optname, struct mbuf *m) +int +sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) { - int optval, val; - struct linger *l; - struct sockbuf *sb; - struct timeval *tv; - - switch (optname) { - - case SO_LINGER: - if (m == NULL || m->m_len != sizeof(struct linger)) - return EINVAL; - l = mtod(m, struct linger *); - if (l->l_linger < 0 || l->l_linger > USHRT_MAX || - l->l_linger > (INT_MAX / hz)) - return EDOM; - so->so_linger = l->l_linger; - if (l->l_onoff) - so->so_options |= SO_LINGER; - else - so->so_options &= ~SO_LINGER; - break; - - case SO_DEBUG: - case SO_KEEPALIVE: - case SO_DONTROUTE: - case SO_USELOOPBACK: - case SO_BROADCAST: - case SO_REUSEADDR: - case SO_REUSEPORT: - case SO_OOBINLINE: - case SO_TIMESTAMP: - if (m == NULL || m->m_len < sizeof(int)) - return EINVAL; - if (*mtod(m, int *)) - so->so_options |= optname; - else - so->so_options &= ~optname; - break; + int error; + struct mbuf *m; - case SO_SNDBUF: - case SO_RCVBUF: - case SO_SNDLOWAT: - case SO_RCVLOWAT: - if (m == NULL || m->m_len < sizeof(int)) - return EINVAL; + error = 0; + m = m0; + if (level != SOL_SOCKET) { + if (so->so_proto && so->so_proto->pr_ctloutput) + return ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + error = ENOPROTOOPT; + } else { + switch (optname) { - /* - * Values < 1 make no sense for any of these - * options, so disallow them. - */ - optval = *mtod(m, int *); - if (optval < 1) - return EINVAL; + case SO_LINGER: + if (m == NULL || m->m_len != sizeof(struct linger)) { + error = EINVAL; + goto bad; + } + if (mtod(m, struct linger *)->l_linger < 0 || + mtod(m, struct linger *)->l_linger > (INT_MAX / hz)) { + error = EDOM; + goto bad; + } + so->so_linger = mtod(m, struct linger *)->l_linger; + /* fall thru... */ - switch (optname) { + case SO_DEBUG: + case SO_KEEPALIVE: + case SO_DONTROUTE: + case SO_USELOOPBACK: + case SO_BROADCAST: + case SO_REUSEADDR: + case SO_REUSEPORT: + case SO_OOBINLINE: + case SO_TIMESTAMP: + if (m == NULL || m->m_len < sizeof(int)) { + error = EINVAL; + goto bad; + } + if (*mtod(m, int *)) + so->so_options |= optname; + else + so->so_options &= ~optname; + break; case SO_SNDBUF: case SO_RCVBUF: - sb = (optname == SO_SNDBUF) ? - &so->so_snd : &so->so_rcv; - if (sbreserve(sb, (u_long)optval, so) == 0) - return ENOBUFS; - sb->sb_flags &= ~SB_AUTOSIZE; - break; - - /* - * Make sure the low-water is never greater than - * the high-water. - */ case SO_SNDLOWAT: - so->so_snd.sb_lowat = - (optval > so->so_snd.sb_hiwat) ? - so->so_snd.sb_hiwat : optval; - break; case SO_RCVLOWAT: - so->so_rcv.sb_lowat = - (optval > so->so_rcv.sb_hiwat) ? - so->so_rcv.sb_hiwat : optval; - break; - } - break; + { + int optval; - case SO_SNDTIMEO: - case SO_RCVTIMEO: - if (m == NULL || m->m_len < sizeof(*tv)) - return EINVAL; - tv = mtod(m, struct timeval *); - if (tv->tv_sec > (INT_MAX - tv->tv_usec / tick) / hz) - return EDOM; - val = tv->tv_sec * hz + tv->tv_usec / tick; - if (val == 0 && tv->tv_usec != 0) - val = 1; + if (m == NULL || m->m_len < sizeof(int)) { + error = EINVAL; + goto bad; + } - switch (optname) { + /* + * Values < 1 make no sense for any of these + * options, so disallow them. + */ + optval = *mtod(m, int *); + if (optval < 1) { + error = EINVAL; + goto bad; + } - case SO_SNDTIMEO: - so->so_snd.sb_timeo = val; + switch (optname) { + + case SO_SNDBUF: + case SO_RCVBUF: + if (sbreserve(optname == SO_SNDBUF ? + &so->so_snd : &so->so_rcv, + (u_long) optval, so) == 0) { + error = ENOBUFS; + goto bad; + } + break; + + /* + * Make sure the low-water is never greater than + * the high-water. + */ + case SO_SNDLOWAT: + so->so_snd.sb_lowat = + (optval > so->so_snd.sb_hiwat) ? + so->so_snd.sb_hiwat : optval; + break; + case SO_RCVLOWAT: + so->so_rcv.sb_lowat = + (optval > so->so_rcv.sb_hiwat) ? + so->so_rcv.sb_hiwat : optval; + break; + } break; + } + + case SO_SNDTIMEO: case SO_RCVTIMEO: - so->so_rcv.sb_timeo = val; - break; - } - break; + { + struct timeval *tv; + int val; - default: - return ENOPROTOOPT; - } - return 0; -} + if (m == NULL || m->m_len < sizeof(*tv)) { + error = EINVAL; + goto bad; + } + tv = mtod(m, struct timeval *); + if (tv->tv_sec > (INT_MAX - tv->tv_usec / tick) / hz) { + error = EDOM; + goto bad; + } + val = tv->tv_sec * hz + tv->tv_usec / tick; + if (val == 0 && tv->tv_usec != 0) + val = 1; -int -sosetopt(struct socket *so, int level, int optname, struct mbuf *m) -{ - int error, prerr; + switch (optname) { - if (level == SOL_SOCKET) - error = sosetopt1(so, level, optname, m); - else - error = ENOPROTOOPT; + case SO_SNDTIMEO: + so->so_snd.sb_timeo = val; + break; + case SO_RCVTIMEO: + so->so_rcv.sb_timeo = val; + break; + } + break; + } - if ((error == 0 || error == ENOPROTOOPT) && - so->so_proto != NULL && so->so_proto->pr_ctloutput != NULL) { - /* give the protocol stack a shot */ - prerr = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, level, - optname, &m); - if (prerr == 0) - error = 0; - else if (prerr != ENOPROTOOPT) - error = prerr; - } else if (m != NULL) - (void)m_free(m); - return error; + default: + error = ENOPROTOOPT; + break; + } + if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { + (void) ((*so->so_proto->pr_ctloutput) + (PRCO_SETOPT, so, level, optname, &m0)); + m = NULL; /* freed by protocol */ + } + } + bad: + if (m) + (void) m_free(m); + return (error); } int @@ -1611,7 +1553,7 @@ sogetopt(struct socket *so, int level, i case SO_LINGER: m->m_len = sizeof(struct linger); mtod(m, struct linger *)->l_onoff = - (so->so_options & SO_LINGER) ? 1 : 0; + so->so_options & SO_LINGER; mtod(m, struct linger *)->l_linger = so->so_linger; break; @@ -1624,7 +1566,7 @@ sogetopt(struct socket *so, int level, i case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: - *mtod(m, int *) = (so->so_options & optname) ? 1 : 0; + *mtod(m, int *) = so->so_options & optname; break; case SO_TYPE: @@ -1814,6 +1756,7 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) { int error, new_somaxkva; struct sysctlnode node; + int s; new_somaxkva = somaxkva; node = *rnode; @@ -1825,10 +1768,12 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ return (EINVAL); - mutex_enter(&so_pendfree_lock); + s = splvm(); + simple_lock(&so_pendfree_slock); somaxkva = new_somaxkva; - cv_broadcast(&socurkva_cv); - mutex_exit(&so_pendfree_lock); + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); return (error); }