Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.81 retrieving revision 1.99 diff -u -p -r1.81 -r1.99 --- src/sys/kern/uipc_socket.c 2003/06/23 11:02:07 1.81 +++ src/sys/kern/uipc_socket.c 2004/04/22 01:01:40 1.99 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.81 2003/06/23 11:02:07 martin Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.99 2004/04/22 01:01:40 matt Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -48,11 +48,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors + * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -72,11 +68,12 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.81 2003/06/23 11:02:07 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.99 2004/04/22 01:01:40 matt Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" #include "opt_mbuftrace.h" +#include "opt_somaxkva.h" #include #include @@ -93,6 +90,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #include @@ -128,6 +126,10 @@ void soinit(void) { + /* Set the initial adjusted socket buffer size. */ + if (sb_max_set(sb_max)) + panic("bad initial sb_max value: %lu\n", sb_max); + pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); @@ -145,9 +147,13 @@ int use_sosend_loan = 0; int use_sosend_loan = 1; #endif +struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; struct mbuf *so_pendfree; -int somaxkva = 16 * 1024 * 1024; +#ifndef SOMAXKVA +#define SOMAXKVA (16 * 1024 * 1024) +#endif +int somaxkva = SOMAXKVA; int socurkva; int sokvawaiters; @@ -155,40 +161,112 @@ int sokvawaiters; #define SOCK_LOAN_CHUNK 65536 static size_t sodopendfree(struct socket *); +static size_t sodopendfreel(struct socket *); +static __inline vsize_t sokvareserve(struct socket *, vsize_t); +static __inline void sokvaunreserve(vsize_t); -vaddr_t -sokvaalloc(vsize_t len, struct socket *so) +static __inline vsize_t +sokvareserve(struct socket *so, vsize_t len) { - vaddr_t lva; int s; + int error; + s = splvm(); + simple_lock(&so_pendfree_slock); while (socurkva + len > somaxkva) { - if (sodopendfree(so)) + size_t freed; + + /* + * try to do pendfree. + */ + + freed = sodopendfreel(so); + + /* + * if some kva was freed, try again. + */ + + if (freed) continue; + SOSEND_COUNTER_INCR(&sosend_kvalimit); - s = splvm(); sokvawaiters++; - (void) tsleep(&socurkva, PVM, "sokva", 0); + error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, + &so_pendfree_slock); sokvawaiters--; - splx(s); + if (error) { + len = 0; + break; + } } + socurkva += len; + simple_unlock(&so_pendfree_slock); + splx(s); + return len; +} + +static __inline void +sokvaunreserve(vsize_t len) +{ + int s; + + s = splvm(); + simple_lock(&so_pendfree_slock); + socurkva -= len; + if (sokvawaiters) + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); +} + +/* + * sokvaalloc: allocate kva for loan. + */ + +vaddr_t +sokvaalloc(vsize_t len, struct socket *so) +{ + vaddr_t lva; + + /* + * reserve kva. + */ + + if (sokvareserve(so, len) == 0) + return 0; + + /* + * allocate kva. + */ lva = uvm_km_valloc_wait(kernel_map, len); - if (lva == 0) + if (lva == 0) { + sokvaunreserve(len); return (0); - socurkva += len; + } return lva; } +/* + * sokvafree: free kva for loan. + */ + void sokvafree(vaddr_t sva, vsize_t len) { + /* + * free kva. + */ + uvm_km_free(kernel_map, sva, len); - socurkva -= len; - if (sokvawaiters) - wakeup(&socurkva); + + /* + * unreserve kva. + */ + + sokvaunreserve(len); } static void @@ -223,63 +301,91 @@ sodoloanfree(struct vm_page **pgs, caddr static size_t sodopendfree(struct socket *so) { - struct mbuf *m; - size_t rv = 0; int s; + size_t rv; s = splvm(); + simple_lock(&so_pendfree_slock); + rv = sodopendfreel(so); + simple_unlock(&so_pendfree_slock); + splx(s); - for (;;) { - m = so_pendfree; - if (m == NULL) - break; - so_pendfree = m->m_next; - splx(s); + return rv; +} - rv += m->m_ext.ext_size; - sodoloanfree((m->m_flags & M_EXT_PAGES) ? - m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, - m->m_ext.ext_size); - s = splvm(); - pool_cache_put(&mbpool_cache, m); - } +/* + * sodopendfreel: free mbufs on "pendfree" list. + * unlock and relock so_pendfree_slock when freeing mbufs. + * + * => called with so_pendfree_slock held. + * => called at splvm. + */ + +static size_t +sodopendfreel(struct socket *so) +{ + size_t rv = 0; + + LOCK_ASSERT(simple_lock_held(&so_pendfree_slock)); for (;;) { - m = so->so_pendfree; + struct mbuf *m; + struct mbuf *next; + + m = so_pendfree; if (m == NULL) break; - so->so_pendfree = m->m_next; - splx(s); + so_pendfree = NULL; + simple_unlock(&so_pendfree_slock); + /* XXX splx */ + + for (; m != NULL; m = next) { + next = m->m_next; + + rv += m->m_ext.ext_size; + sodoloanfree((m->m_flags & M_EXT_PAGES) ? + m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, + m->m_ext.ext_size); + pool_cache_put(&mbpool_cache, m); + } - rv += m->m_ext.ext_size; - sodoloanfree((m->m_flags & M_EXT_PAGES) ? - m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, - m->m_ext.ext_size); - s = splvm(); - pool_cache_put(&mbpool_cache, m); + /* XXX splvm */ + simple_lock(&so_pendfree_slock); } - splx(s); return (rv); } void soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) { - struct socket *so = arg; int s; if (m == NULL) { + + /* + * called from MEXTREMOVE. + */ + sodoloanfree(NULL, buf, size); return; } + /* + * postpone freeing mbuf. + * + * we can't do it in interrupt context + * because we need to put kva back to kernel_map. + */ + s = splvm(); - m->m_next = so->so_pendfree; - so->so_pendfree = m; - splx(s); + simple_lock(&so_pendfree_slock); + m->m_next = so_pendfree; + so_pendfree = m; if (sokvawaiters) wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); } static long @@ -352,7 +458,7 @@ int socreate(int dom, struct socket **aso, int type, int proto) { struct proc *p; - struct protosw *prp; + const struct protosw *prp; struct socket *so; int error, s; @@ -381,6 +487,8 @@ socreate(int dom, struct socket **aso, i #endif if (p != 0) so->so_uid = p->p_ucred->cr_uid; + else + so->so_uid = UID_MAX; error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, (struct mbuf *)(long)proto, (struct mbuf *)0, p); if (error) { @@ -430,7 +538,6 @@ solisten(struct socket *so, int backlog) void sofree(struct socket *so) { - struct mbuf *m; if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; @@ -443,13 +550,14 @@ sofree(struct socket *so) if (!soqremque(so, 0)) return; } - sbrelease(&so->so_snd); + if (so->so_rcv.sb_hiwat) + (void)chgsbsize(so->so_uid, &so->so_rcv.sb_hiwat, 0, + RLIM_INFINITY); + if (so->so_snd.sb_hiwat) + (void)chgsbsize(so->so_uid, &so->so_snd.sb_hiwat, 0, + RLIM_INFINITY); + sbrelease(&so->so_snd, so); sorflush(so); - while ((m = so->so_pendfree) != NULL) { - so->so_pendfree = m->m_next; - m->m_next = so_pendfree; - so_pendfree = m; - } pool_put(&socket_pool, so); } @@ -691,7 +799,7 @@ sosend(struct socket *so, struct mbuf *a if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); - if (space < resid + clen && uio && + if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); @@ -834,7 +942,7 @@ soreceive(struct socket *so, struct mbuf { struct mbuf *m, **mp; int flags, len, error, s, offset, moff, type, orig_resid; - struct protosw *pr; + const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; @@ -1237,7 +1345,7 @@ soreceive(struct socket *so, struct mbuf int soshutdown(struct socket *so, int how) { - struct protosw *pr; + const struct protosw *pr; pr = so->so_proto; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) @@ -1255,7 +1363,7 @@ void sorflush(struct socket *so) { struct sockbuf *sb, asb; - struct protosw *pr; + const struct protosw *pr; int s; sb = &so->so_rcv; @@ -1266,11 +1374,16 @@ sorflush(struct socket *so) socantrcvmore(so); sbunlock(sb); asb = *sb; - memset((caddr_t)sb, 0, sizeof(*sb)); + /* + * Clear most of the sockbuf structure, but leave some of the + * fields valid. + */ + memset(&sb->sb_startzero, 0, + sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease(&asb); + sbrelease(&asb, so); } int @@ -1344,7 +1457,7 @@ sosetopt(struct socket *so, int level, i case SO_RCVBUF: if (sbreserve(optname == SO_SNDBUF ? &so->so_snd : &so->so_rcv, - (u_long) optval) == 0) { + (u_long) optval, so) == 0) { error = ENOBUFS; goto bad; } @@ -1501,12 +1614,7 @@ sogetopt(struct socket *so, int level, i void sohasoutofband(struct socket *so) { - struct proc *p; - - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGURG); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGURG); + fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); selwakeup(&so->so_rcv.sb_sel); } @@ -1625,3 +1733,54 @@ soo_kqfilter(struct file *fp, struct kno return (0); } +#include + +static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); + +/* + * sysctl helper routine for kern.somaxkva. ensures that the given + * value is not too small. + * (XXX should we maybe make sure it's not too large as well?) + */ +static int +sysctl_kern_somaxkva(SYSCTLFN_ARGS) +{ + int error, new_somaxkva; + struct sysctlnode node; + int s; + + new_somaxkva = somaxkva; + node = *rnode; + node.sysctl_data = &new_somaxkva; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ + return (EINVAL); + + s = splvm(); + simple_lock(&so_pendfree_slock); + somaxkva = new_somaxkva; + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); + + return (error); +} + +SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") +{ + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "kern", NULL, + NULL, 0, NULL, 0, + CTL_KERN, CTL_EOL); + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "somaxkva", NULL, + sysctl_kern_somaxkva, 0, NULL, 0, + CTL_KERN, KERN_SOMAXKVA, CTL_EOL); +}