| version 1.54.2.10, 2002/04/17 00:06:19 |
version 1.54.2.11, 2002/06/20 03:47:25 |
|
|
| /* $NetBSD$ */ |
/* $NetBSD$ */ |
| |
|
| |
/*- |
| |
* Copyright (c) 2002 The NetBSD Foundation, Inc. |
| |
* All rights reserved. |
| |
* |
| |
* This code is derived from software contributed to The NetBSD Foundation |
| |
* by Jason R. Thorpe of Wasabi Systems, Inc. |
| |
* |
| |
* Redistribution and use in source and binary forms, with or without |
| |
* modification, are permitted provided that the following conditions |
| |
* are met: |
| |
* 1. Redistributions of source code must retain the above copyright |
| |
* notice, this list of conditions and the following disclaimer. |
| |
* 2. Redistributions in binary form must reproduce the above copyright |
| |
* notice, this list of conditions and the following disclaimer in the |
| |
* documentation and/or other materials provided with the distribution. |
| |
* 3. All advertising materials mentioning features or use of this software |
| |
* must display the following acknowledgement: |
| |
* This product includes software developed by the NetBSD |
| |
* Foundation, Inc. and its contributors. |
| |
* 4. Neither the name of The NetBSD Foundation nor the names of its |
| |
* contributors may be used to endorse or promote products derived |
| |
* from this software without specific prior written permission. |
| |
* |
| |
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
| |
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
| |
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
| |
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| |
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| |
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| |
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| |
* POSSIBILITY OF SUCH DAMAGE. |
| |
*/ |
| |
|
| /* |
/* |
| * Copyright (c) 1982, 1986, 1988, 1990, 1993 |
* Copyright (c) 1982, 1986, 1988, 1990, 1993 |
| * The Regents of the University of California. All rights reserved. |
* The Regents of the University of California. All rights reserved. |
|
|
| #include <sys/cdefs.h> |
#include <sys/cdefs.h> |
| __KERNEL_RCSID(0, "$NetBSD$"); |
__KERNEL_RCSID(0, "$NetBSD$"); |
| |
|
| |
#include "opt_sock_counters.h" |
| |
#include "opt_sosend_loan.h" |
| |
|
| #include <sys/param.h> |
#include <sys/param.h> |
| #include <sys/systm.h> |
#include <sys/systm.h> |
| #include <sys/lwp.h> |
#include <sys/lwp.h> |
| Line 54 __KERNEL_RCSID(0, "$NetBSD$"); |
|
| Line 93 __KERNEL_RCSID(0, "$NetBSD$"); |
|
| #include <sys/resourcevar.h> |
#include <sys/resourcevar.h> |
| #include <sys/pool.h> |
#include <sys/pool.h> |
| |
|
| |
#include <uvm/uvm.h> |
| |
|
| struct pool socket_pool; |
struct pool socket_pool; |
| |
|
| extern int somaxconn; /* patchable (XXX sysctl) */ |
extern int somaxconn; /* patchable (XXX sysctl) */ |
| int somaxconn = SOMAXCONN; |
int somaxconn = SOMAXCONN; |
| |
|
| |
#ifdef SOSEND_COUNTERS |
| |
#include <sys/device.h> |
| |
|
| |
struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
| |
NULL, "sosend", "loan big"); |
| |
struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
| |
NULL, "sosend", "copy big"); |
| |
struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
| |
NULL, "sosend", "copy small"); |
| |
struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, |
| |
NULL, "sosend", "kva limit"); |
| |
|
| |
#define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ |
| |
|
| |
#else |
| |
|
| |
#define SOSEND_COUNTER_INCR(ev) /* nothing */ |
| |
|
| |
#endif /* SOSEND_COUNTERS */ |
| |
|
| void |
void |
| soinit(void) |
soinit(void) |
| { |
{ |
| |
|
| pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, |
pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, |
| "sockpl", NULL); |
"sockpl", NULL); |
| |
|
| |
#ifdef SOSEND_COUNTERS |
| |
evcnt_attach_static(&sosend_loan_big); |
| |
evcnt_attach_static(&sosend_copy_big); |
| |
evcnt_attach_static(&sosend_copy_small); |
| |
evcnt_attach_static(&sosend_kvalimit); |
| |
#endif /* SOSEND_COUNTERS */ |
| |
} |
| |
|
| |
#ifdef SOSEND_LOAN |
| |
int use_sosend_loan = 1; |
| |
#else |
| |
int use_sosend_loan = 0; |
| |
#endif |
| |
|
| |
struct mbuf *so_pendfree; |
| |
|
| |
int somaxkva = 16 * 1024 * 1024; |
| |
int socurkva; |
| |
int sokvawaiters; |
| |
|
| |
#define SOCK_LOAN_THRESH 4096 |
| |
#define SOCK_LOAN_CHUNK 65536 |
| |
|
| |
static void |
| |
sodoloanfree(caddr_t buf, u_int size) |
| |
{ |
| |
struct vm_page **pgs; |
| |
vaddr_t va, sva, eva; |
| |
vsize_t len; |
| |
paddr_t pa; |
| |
int i, npgs; |
| |
|
| |
eva = round_page((vaddr_t) buf + size); |
| |
sva = trunc_page((vaddr_t) buf); |
| |
len = eva - sva; |
| |
npgs = len >> PAGE_SHIFT; |
| |
|
| |
pgs = alloca(npgs * sizeof(*pgs)); |
| |
|
| |
for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { |
| |
if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) |
| |
panic("sodoloanfree: va 0x%lx not mapped", va); |
| |
pgs[i] = PHYS_TO_VM_PAGE(pa); |
| |
} |
| |
|
| |
pmap_kremove(sva, len); |
| |
pmap_update(pmap_kernel()); |
| |
uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); |
| |
uvm_km_free(kernel_map, sva, len); |
| |
socurkva -= len; |
| |
if (sokvawaiters) |
| |
wakeup(&socurkva); |
| |
} |
| |
|
| |
static size_t |
| |
sodopendfree(struct socket *so) |
| |
{ |
| |
struct mbuf *m; |
| |
size_t rv = 0; |
| |
int s; |
| |
|
| |
s = splvm(); |
| |
|
| |
for (;;) { |
| |
m = so_pendfree; |
| |
if (m == NULL) |
| |
break; |
| |
so_pendfree = m->m_next; |
| |
splx(s); |
| |
|
| |
rv += m->m_ext.ext_size; |
| |
sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); |
| |
s = splvm(); |
| |
pool_cache_put(&mbpool_cache, m); |
| |
} |
| |
|
| |
for (;;) { |
| |
m = so->so_pendfree; |
| |
if (m == NULL) |
| |
break; |
| |
so->so_pendfree = m->m_next; |
| |
splx(s); |
| |
|
| |
rv += m->m_ext.ext_size; |
| |
sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); |
| |
s = splvm(); |
| |
pool_cache_put(&mbpool_cache, m); |
| |
} |
| |
|
| |
splx(s); |
| |
return (rv); |
| |
} |
| |
|
| |
static void |
| |
soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg) |
| |
{ |
| |
struct socket *so = arg; |
| |
int s; |
| |
|
| |
if (m == NULL) { |
| |
sodoloanfree(buf, size); |
| |
return; |
| |
} |
| |
|
| |
s = splvm(); |
| |
m->m_next = so->so_pendfree; |
| |
so->so_pendfree = m; |
| |
splx(s); |
| |
if (sokvawaiters) |
| |
wakeup(&socurkva); |
| |
} |
| |
|
| |
static long |
| |
sosend_loan(struct socket *so, struct uio *uio, struct mbuf *m, long space) |
| |
{ |
| |
struct iovec *iov = uio->uio_iov; |
| |
vaddr_t sva, eva; |
| |
vsize_t len; |
| |
struct vm_page **pgs; |
| |
vaddr_t lva, va; |
| |
int npgs, s, i, error; |
| |
|
| |
if (uio->uio_segflg != UIO_USERSPACE) |
| |
return (0); |
| |
|
| |
if (iov->iov_len < (size_t) space) |
| |
space = iov->iov_len; |
| |
if (space > SOCK_LOAN_CHUNK) |
| |
space = SOCK_LOAN_CHUNK; |
| |
|
| |
eva = round_page((vaddr_t) iov->iov_base + space); |
| |
sva = trunc_page((vaddr_t) iov->iov_base); |
| |
len = eva - sva; |
| |
npgs = len >> PAGE_SHIFT; |
| |
|
| |
while (socurkva + len > somaxkva) { |
| |
if (sodopendfree(so)) |
| |
continue; |
| |
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
| |
s = splvm(); |
| |
sokvawaiters++; |
| |
(void) tsleep(&socurkva, PVM, "sokva", 0); |
| |
sokvawaiters--; |
| |
splx(s); |
| |
} |
| |
|
| |
lva = uvm_km_valloc_wait(kernel_map, len); |
| |
if (lva == 0) |
| |
return (0); |
| |
socurkva += len; |
| |
|
| |
pgs = alloca(npgs * sizeof(*pgs)); |
| |
|
| |
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, |
| |
pgs, UVM_LOAN_TOPAGE); |
| |
if (error) { |
| |
uvm_km_free(kernel_map, lva, len); |
| |
socurkva -= len; |
| |
return (0); |
| |
} |
| |
|
| |
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
| |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ); |
| |
pmap_update(pmap_kernel()); |
| |
|
| |
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
| |
|
| |
MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); |
| |
|
| |
uio->uio_resid -= space; |
| |
/* uio_offset not updated, not set/used for write(2) */ |
| |
uio->uio_iov->iov_base = (caddr_t) uio->uio_iov->iov_base + space; |
| |
uio->uio_iov->iov_len -= space; |
| |
if (uio->uio_iov->iov_len == 0) { |
| |
uio->uio_iov++; |
| |
uio->uio_iovcnt--; |
| |
} |
| |
|
| |
return (space); |
| } |
} |
| |
|
| /* |
/* |
| Line 152 solisten(struct socket *so, int backlog) |
|
| Line 393 solisten(struct socket *so, int backlog) |
|
| void |
void |
| sofree(struct socket *so) |
sofree(struct socket *so) |
| { |
{ |
| |
struct mbuf *m; |
| |
|
| if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) |
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) |
| return; |
return; |
| Line 166 sofree(struct socket *so) |
|
| Line 408 sofree(struct socket *so) |
|
| } |
} |
| sbrelease(&so->so_snd); |
sbrelease(&so->so_snd); |
| sorflush(so); |
sorflush(so); |
| |
while ((m = so->so_pendfree) != NULL) { |
| |
so->so_pendfree = m->m_next; |
| |
m->m_next = so_pendfree; |
| |
so_pendfree = m; |
| |
} |
| pool_put(&socket_pool, so); |
pool_put(&socket_pool, so); |
| } |
} |
| |
|
| Line 321 sodisconnect(struct socket *so) |
|
| Line 568 sodisconnect(struct socket *so) |
|
| (struct proc *)0); |
(struct proc *)0); |
| bad: |
bad: |
| splx(s); |
splx(s); |
| |
sodopendfree(so); |
| return (error); |
return (error); |
| } |
} |
| |
|
| Line 351 sosend(struct socket *so, struct mbuf *a |
|
| Line 599 sosend(struct socket *so, struct mbuf *a |
|
| long space, len, resid, clen, mlen; |
long space, len, resid, clen, mlen; |
| int error, s, dontroute, atomic; |
int error, s, dontroute, atomic; |
| |
|
| |
sodopendfree(so); |
| |
|
| p = curproc->l_proc; /* XXX */ |
p = curproc->l_proc; /* XXX */ |
| clen = 0; |
clen = 0; |
| atomic = sosendallatonce(so) || top; |
atomic = sosendallatonce(so) || top; |
| Line 436 sosend(struct socket *so, struct mbuf *a |
|
| Line 686 sosend(struct socket *so, struct mbuf *a |
|
| MGET(m, M_WAIT, MT_DATA); |
MGET(m, M_WAIT, MT_DATA); |
| mlen = MLEN; |
mlen = MLEN; |
| } |
} |
| |
if (use_sosend_loan && |
| |
uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && |
| |
space >= SOCK_LOAN_THRESH && |
| |
(len = sosend_loan(so, uio, m, |
| |
space)) != 0) { |
| |
SOSEND_COUNTER_INCR(&sosend_loan_big); |
| |
space -= len; |
| |
goto have_data; |
| |
} |
| if (resid >= MINCLSIZE && space >= MCLBYTES) { |
if (resid >= MINCLSIZE && space >= MCLBYTES) { |
| |
SOSEND_COUNTER_INCR(&sosend_copy_big); |
| MCLGET(m, M_WAIT); |
MCLGET(m, M_WAIT); |
| if ((m->m_flags & M_EXT) == 0) |
if ((m->m_flags & M_EXT) == 0) |
| goto nopages; |
goto nopages; |
| mlen = MCLBYTES; |
mlen = MCLBYTES; |
| #ifdef MAPPED_MBUFS |
|
| len = lmin(MCLBYTES, resid); |
|
| #else |
|
| if (atomic && top == 0) { |
if (atomic && top == 0) { |
| len = lmin(MCLBYTES - max_hdr, |
len = lmin(MCLBYTES - max_hdr, |
| resid); |
resid); |
| m->m_data += max_hdr; |
m->m_data += max_hdr; |
| } else |
} else |
| len = lmin(MCLBYTES, resid); |
len = lmin(MCLBYTES, resid); |
| #endif |
|
| space -= len; |
space -= len; |
| } else { |
} else { |
| nopages: |
nopages: |
| |
SOSEND_COUNTER_INCR(&sosend_copy_small); |
| len = lmin(lmin(mlen, resid), space); |
len = lmin(lmin(mlen, resid), space); |
| space -= len; |
space -= len; |
| /* |
/* |
|
|
| } |
} |
| error = uiomove(mtod(m, caddr_t), (int)len, |
error = uiomove(mtod(m, caddr_t), (int)len, |
| uio); |
uio); |
| |
have_data: |
| resid = uio->uio_resid; |
resid = uio->uio_resid; |
| m->m_len = len; |
m->m_len = len; |
| *mp = m; |
*mp = m; |
| Line 540 soreceive(struct socket *so, struct mbuf |
|
| Line 798 soreceive(struct socket *so, struct mbuf |
|
| int flags, len, error, s, offset, moff, type, orig_resid; |
int flags, len, error, s, offset, moff, type, orig_resid; |
| struct protosw *pr; |
struct protosw *pr; |
| struct mbuf *nextrecord; |
struct mbuf *nextrecord; |
| |
int mbuf_removed = 0; |
| |
|
| pr = so->so_proto; |
pr = so->so_proto; |
| mp = mp0; |
mp = mp0; |
| Line 553 soreceive(struct socket *so, struct mbuf |
|
| Line 812 soreceive(struct socket *so, struct mbuf |
|
| flags = *flagsp &~ MSG_EOR; |
flags = *flagsp &~ MSG_EOR; |
| else |
else |
| flags = 0; |
flags = 0; |
| |
|
| |
if ((flags & MSG_DONTWAIT) == 0) |
| |
sodopendfree(so); |
| |
|
| if (flags & MSG_OOB) { |
if (flags & MSG_OOB) { |
| m = m_get(M_WAIT, MT_DATA); |
m = m_get(M_WAIT, MT_DATA); |
| error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, |
error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, |
| Line 657 soreceive(struct socket *so, struct mbuf |
|
| Line 920 soreceive(struct socket *so, struct mbuf |
|
| m = m->m_next; |
m = m->m_next; |
| } else { |
} else { |
| sbfree(&so->so_rcv, m); |
sbfree(&so->so_rcv, m); |
| |
mbuf_removed = 1; |
| if (paddr) { |
if (paddr) { |
| *paddr = m; |
*paddr = m; |
| so->so_rcv.sb_mb = m->m_next; |
so->so_rcv.sb_mb = m->m_next; |
| Line 675 soreceive(struct socket *so, struct mbuf |
|
| Line 939 soreceive(struct socket *so, struct mbuf |
|
| m = m->m_next; |
m = m->m_next; |
| } else { |
} else { |
| sbfree(&so->so_rcv, m); |
sbfree(&so->so_rcv, m); |
| |
mbuf_removed = 1; |
| if (controlp) { |
if (controlp) { |
| if (pr->pr_domain->dom_externalize && |
if (pr->pr_domain->dom_externalize && |
| mtod(m, struct cmsghdr *)->cmsg_type == |
mtod(m, struct cmsghdr *)->cmsg_type == |
| Line 731 soreceive(struct socket *so, struct mbuf |
|
| Line 996 soreceive(struct socket *so, struct mbuf |
|
| splx(s); |
splx(s); |
| error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); |
error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); |
| s = splsoftnet(); |
s = splsoftnet(); |
| if (error) |
if (error) { |
| |
/* |
| |
* If any part of the record has been removed |
| |
* (such as the MT_SONAME mbuf, which will |
| |
* happen when PR_ADDR, and thus also |
| |
* PR_ATOMIC, is set), then drop the entire |
| |
* record to maintain the atomicity of the |
| |
* receive operation. |
| |
* |
| |
* This avoids a later panic("receive 1a") |
| |
* when compiled with DIAGNOSTIC. |
| |
*/ |
| |
if (m && mbuf_removed |
| |
&& (pr->pr_flags & PR_ATOMIC)) |
| |
(void) sbdroprecord(&so->so_rcv); |
| |
|
| goto release; |
goto release; |
| |
} |
| } else |
} else |
| uio->uio_resid -= len; |
uio->uio_resid -= len; |
| if (len == m->m_len - moff) { |
if (len == m->m_len - moff) { |
| Line 793 soreceive(struct socket *so, struct mbuf |
|
| Line 1074 soreceive(struct socket *so, struct mbuf |
|
| !sosendallatonce(so) && !nextrecord) { |
!sosendallatonce(so) && !nextrecord) { |
| if (so->so_error || so->so_state & SS_CANTRCVMORE) |
if (so->so_error || so->so_state & SS_CANTRCVMORE) |
| break; |
break; |
| |
/* |
| |
* If we are peeking and the socket receive buffer is |
| |
* full, stop since we can't get more data to peek at. |
| |
*/ |
| |
if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) |
| |
break; |
| |
/* |
| |
* If we've drained the socket buffer, tell the |
| |
* protocol in case it needs to do something to |
| |
* get it filled again. |
| |
*/ |
| |
if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) |
| |
(*pr->pr_usrreq)(so, PRU_RCVD, |
| |
(struct mbuf *)0, |
| |
(struct mbuf *)(long)flags, |
| |
(struct mbuf *)0, |
| |
(struct proc *)0); |
| error = sbwait(&so->so_rcv); |
error = sbwait(&so->so_rcv); |
| if (error) { |
if (error) { |
| sbunlock(&so->so_rcv); |
sbunlock(&so->so_rcv); |