Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.93 retrieving revision 1.107 diff -u -p -r1.93 -r1.107 --- src/sys/kern/uipc_socket.c 2004/03/17 10:03:26 1.93 +++ src/sys/kern/uipc_socket.c 2004/09/03 18:14:09 1.107 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.93 2004/03/17 10:03:26 yamt Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.107 2004/09/03 18:14:09 darrenr Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.93 2004/03/17 10:03:26 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.107 2004/09/03 18:14:09 darrenr Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -94,7 +94,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include -struct pool socket_pool; +POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -116,6 +116,10 @@ struct evcnt sosend_kvalimit = EVCNT_INI #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ +EVCNT_ATTACH_STATIC(sosend_loan_big); +EVCNT_ATTACH_STATIC(sosend_copy_big); +EVCNT_ATTACH_STATIC(sosend_copy_small); +EVCNT_ATTACH_STATIC(sosend_kvalimit); #else #define SOSEND_COUNTER_INCR(ev) /* nothing */ @@ -130,15 +134,6 @@ soinit(void) if (sb_max_set(sb_max)) panic("bad initial sb_max value: %lu\n", sb_max); - pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, - "sockpl", NULL); - -#ifdef SOSEND_COUNTERS - evcnt_attach_static(&sosend_loan_big); - evcnt_attach_static(&sosend_copy_big); - evcnt_attach_static(&sosend_copy_small); - evcnt_attach_static(&sosend_kvalimit); -#endif /* SOSEND_COUNTERS */ } #ifdef SOSEND_NO_LOAN @@ -162,20 +157,14 @@ int sokvawaiters; static size_t sodopendfree(struct socket *); static size_t sodopendfreel(struct socket *); +static __inline vsize_t sokvareserve(struct socket *, vsize_t); +static __inline void sokvaunreserve(vsize_t); -/* - * sokvaalloc: allocate kva for loan. - */ - -vaddr_t -sokvaalloc(vsize_t len, struct socket *so) +static __inline vsize_t +sokvareserve(struct socket *so, vsize_t len) { - vaddr_t lva; int s; - - /* - * reserve kva. - */ + int error; s = splvm(); simple_lock(&so_pendfree_slock); @@ -197,20 +186,59 @@ sokvaalloc(vsize_t len, struct socket *s SOSEND_COUNTER_INCR(&sosend_kvalimit); sokvawaiters++; - (void) ltsleep(&socurkva, PVM, "sokva", 0, &so_pendfree_slock); + error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, + &so_pendfree_slock); sokvawaiters--; + if (error) { + len = 0; + break; + } } socurkva += len; simple_unlock(&so_pendfree_slock); splx(s); + return len; +} + +static __inline void +sokvaunreserve(vsize_t len) +{ + int s; + + s = splvm(); + simple_lock(&so_pendfree_slock); + socurkva -= len; + if (sokvawaiters) + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); +} + +/* + * sokvaalloc: allocate kva for loan. + */ + +vaddr_t +sokvaalloc(vsize_t len, struct socket *so) +{ + vaddr_t lva; + + /* + * reserve kva. + */ + + if (sokvareserve(so, len) == 0) + return 0; /* * allocate kva. */ lva = uvm_km_valloc_wait(kernel_map, len); - if (lva == 0) + if (lva == 0) { + sokvaunreserve(len); return (0); + } return lva; } @@ -222,7 +250,6 @@ sokvaalloc(vsize_t len, struct socket *s void sokvafree(vaddr_t sva, vsize_t len) { - int s; /* * free kva. @@ -234,13 +261,7 @@ sokvafree(vaddr_t sva, vsize_t len) * unreserve kva. */ - s = splvm(); - simple_lock(&so_pendfree_slock); - socurkva -= len; - if (sokvawaiters) - wakeup(&socurkva); - simple_unlock(&so_pendfree_slock); - splx(s); + sokvaunreserve(len); } static void @@ -386,6 +407,7 @@ sosend_loan(struct socket *so, struct ui /* XXX KDASSERT */ KASSERT(npgs <= M_EXT_MAXPAGES); + KASSERT(uio->uio_procp != NULL); lva = sokvaalloc(len, so); if (lva == 0) @@ -429,14 +451,12 @@ sosend_loan(struct socket *so, struct ui */ /*ARGSUSED*/ int -socreate(int dom, struct socket **aso, int type, int proto) +socreate(int dom, struct socket **aso, int type, int proto, struct proc *p) { - struct proc *p; - struct protosw *prp; + const struct protosw *prp; struct socket *so; int error, s; - p = curproc; /* XXX */ if (proto) prp = pffindproto(dom, proto, type); else @@ -461,6 +481,8 @@ socreate(int dom, struct socket **aso, i #endif if (p != 0) so->so_uid = p->p_ucred->cr_uid; + else + so->so_uid = UID_MAX; error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, (struct mbuf *)(long)proto, (struct mbuf *)0, p); if (error) { @@ -522,7 +544,13 @@ sofree(struct socket *so) if (!soqremque(so, 0)) return; } - sbrelease(&so->so_snd); + if (so->so_rcv.sb_hiwat) + (void)chgsbsize(so->so_uid, &so->so_rcv.sb_hiwat, 0, + RLIM_INFINITY); + if (so->so_snd.sb_hiwat) + (void)chgsbsize(so->so_uid, &so->so_snd.sb_hiwat, 0, + RLIM_INFINITY); + sbrelease(&so->so_snd, so); sorflush(so); pool_put(&socket_pool, so); } @@ -621,12 +649,10 @@ soaccept(struct socket *so, struct mbuf } int -soconnect(struct socket *so, struct mbuf *nam) +soconnect(struct socket *so, struct mbuf *nam, struct proc *p) { - struct proc *p; int s, error; - p = curproc; /* XXX */ if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splsoftnet(); @@ -703,16 +729,14 @@ sodisconnect(struct socket *so) */ int sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags) + struct mbuf *control, int flags, struct proc *p) { - struct proc *p; struct mbuf **mp, *m; long space, len, resid, clen, mlen; int error, s, dontroute, atomic; sodopendfree(so); - p = curproc; /* XXX */ clen = 0; atomic = sosendallatonce(so) || top; if (uio) @@ -733,7 +757,8 @@ sosend(struct socket *so, struct mbuf *a dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); - p->p_stats->p_ru.ru_msgsnd++; + if (p) + p->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; #define snderr(errno) { error = errno; splx(s); goto release; } @@ -765,7 +790,7 @@ sosend(struct socket *so, struct mbuf *a if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); - if (space < resid + clen && uio && + if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); @@ -906,9 +931,10 @@ int soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { + struct proc * p; struct mbuf *m, **mp; int flags, len, error, s, offset, moff, type, orig_resid; - struct protosw *pr; + const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; @@ -916,6 +942,8 @@ soreceive(struct socket *so, struct mbuf mp = mp0; type = 0; orig_resid = uio->uio_resid; + p = uio->uio_procp; + if (paddr) *paddr = 0; if (controlp) @@ -931,8 +959,8 @@ soreceive(struct socket *so, struct mbuf if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)(long)(flags & MSG_PEEK), + (struct mbuf *)0, p); if (error) goto bad; do { @@ -949,7 +977,7 @@ soreceive(struct socket *so, struct mbuf *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, (struct mbuf *)0, p); restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) @@ -1022,10 +1050,8 @@ soreceive(struct socket *so, struct mbuf * While we process the initial mbufs containing address and control * info, we save a copy of m->m_nextpkt into nextrecord. */ -#ifdef notyet /* XXXX */ - if (uio->uio_procp) - uio->uio_procp->p_stats->p_ru.ru_msgrcv++; -#endif + if (p) + p->p_stats->p_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); @@ -1063,15 +1089,23 @@ soreceive(struct socket *so, struct mbuf sbfree(&so->so_rcv, m); mbuf_removed = 1; if (controlp) { - if (pr->pr_domain->dom_externalize && + struct domain *dom = pr->pr_domain; + if (dom->dom_externalize && p && mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) - error = (*pr->pr_domain->dom_externalize)(m); + error = (*dom->dom_externalize)(m, p); *controlp = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { + /* + * Dispose of any SCM_RIGHTS message that went + * through the read path rather than recv. + */ + if (pr->pr_domain->dom_dispose && + mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) + (*pr->pr_domain->dom_dispose)(m); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } @@ -1252,8 +1286,7 @@ soreceive(struct socket *so, struct mbuf (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)(long)flags, - (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)0, p); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); @@ -1290,8 +1323,7 @@ soreceive(struct socket *so, struct mbuf SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)(long)flags, (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)(long)flags, (struct mbuf *)0, p); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1311,7 +1343,7 @@ soreceive(struct socket *so, struct mbuf int soshutdown(struct socket *so, int how) { - struct protosw *pr; + const struct protosw *pr; pr = so->so_proto; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) @@ -1329,7 +1361,7 @@ void sorflush(struct socket *so) { struct sockbuf *sb, asb; - struct protosw *pr; + const struct protosw *pr; int s; sb = &so->so_rcv; @@ -1349,7 +1381,7 @@ sorflush(struct socket *so) splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease(&asb); + sbrelease(&asb, so); } int @@ -1423,7 +1455,7 @@ sosetopt(struct socket *so, int level, i case SO_RCVBUF: if (sbreserve(optname == SO_SNDBUF ? &so->so_snd : &so->so_rcv, - (u_long) optval) == 0) { + (u_long) optval, so) == 0) { error = ENOBUFS; goto bad; } @@ -1451,14 +1483,14 @@ sosetopt(struct socket *so, int level, i case SO_RCVTIMEO: { struct timeval *tv; - short val; + int val; if (m == NULL || m->m_len < sizeof(*tv)) { error = EINVAL; goto bad; } tv = mtod(m, struct timeval *); - if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { + if (tv->tv_sec > (INT_MAX - tv->tv_usec / tick) / hz) { error = EDOM; goto bad; } @@ -1568,6 +1600,10 @@ sogetopt(struct socket *so, int level, i break; } + case SO_OVERFLOWED: + *mtod(m, int *) = so->so_rcv.sb_overflowed; + break; + default: (void)m_free(m); return (ENOPROTOOPT); @@ -1699,3 +1735,56 @@ soo_kqfilter(struct file *fp, struct kno return (0); } +#include + +static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); + +/* + * sysctl helper routine for kern.somaxkva. ensures that the given + * value is not too small. + * (XXX should we maybe make sure it's not too large as well?) + */ +static int +sysctl_kern_somaxkva(SYSCTLFN_ARGS) +{ + int error, new_somaxkva; + struct sysctlnode node; + int s; + + new_somaxkva = somaxkva; + node = *rnode; + node.sysctl_data = &new_somaxkva; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + if (new_somaxkva < (16 * 1024 * 1024)) /* sanity */ + return (EINVAL); + + s = splvm(); + simple_lock(&so_pendfree_slock); + somaxkva = new_somaxkva; + wakeup(&socurkva); + simple_unlock(&so_pendfree_slock); + splx(s); + + return (error); +} + +SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") +{ + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT, + CTLTYPE_NODE, "kern", NULL, + NULL, 0, NULL, 0, + CTL_KERN, CTL_EOL); + + sysctl_createv(clog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "somaxkva", + SYSCTL_DESCR("Maximum amount of kernel memory to be " + "used for socket buffers"), + sysctl_kern_somaxkva, 0, NULL, 0, + CTL_KERN, KERN_SOMAXKVA, CTL_EOL); +}