version 1.71, 2002/08/21 05:13:37 |
version 1.83, 2003/06/29 22:31:30 |
Line 76 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 76 __KERNEL_RCSID(0, "$NetBSD$"); |
|
|
|
#include "opt_sock_counters.h" |
#include "opt_sock_counters.h" |
#include "opt_sosend_loan.h" |
#include "opt_sosend_loan.h" |
|
#include "opt_mbuftrace.h" |
|
|
#include <sys/param.h> |
#include <sys/param.h> |
#include <sys/systm.h> |
#include <sys/systm.h> |
Line 91 __KERNEL_RCSID(0, "$NetBSD$"); |
|
Line 92 __KERNEL_RCSID(0, "$NetBSD$"); |
|
#include <sys/signalvar.h> |
#include <sys/signalvar.h> |
#include <sys/resourcevar.h> |
#include <sys/resourcevar.h> |
#include <sys/pool.h> |
#include <sys/pool.h> |
|
#include <sys/event.h> |
|
|
#include <uvm/uvm.h> |
#include <uvm/uvm.h> |
|
|
struct pool socket_pool; |
struct pool socket_pool; |
|
|
|
MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); |
|
MALLOC_DEFINE(M_SONAME, "soname", "socket name"); |
|
|
extern int somaxconn; /* patchable (XXX sysctl) */ |
extern int somaxconn; /* patchable (XXX sysctl) */ |
int somaxconn = SOMAXCONN; |
int somaxconn = SOMAXCONN; |
|
|
Line 149 int sokvawaiters; |
|
Line 154 int sokvawaiters; |
|
#define SOCK_LOAN_THRESH 4096 |
#define SOCK_LOAN_THRESH 4096 |
#define SOCK_LOAN_CHUNK 65536 |
#define SOCK_LOAN_CHUNK 65536 |
|
|
|
static size_t sodopendfree(struct socket *); |
|
|
|
vaddr_t |
|
sokvaalloc(vsize_t len, struct socket *so) |
|
{ |
|
vaddr_t lva; |
|
int s; |
|
|
|
while (socurkva + len > somaxkva) { |
|
if (sodopendfree(so)) |
|
continue; |
|
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
|
s = splvm(); |
|
sokvawaiters++; |
|
(void) tsleep(&socurkva, PVM, "sokva", 0); |
|
sokvawaiters--; |
|
splx(s); |
|
} |
|
|
|
lva = uvm_km_valloc_wait(kernel_map, len); |
|
if (lva == 0) |
|
return (0); |
|
socurkva += len; |
|
|
|
return lva; |
|
} |
|
|
|
void |
|
sokvafree(vaddr_t sva, vsize_t len) |
|
{ |
|
|
|
uvm_km_free(kernel_map, sva, len); |
|
socurkva -= len; |
|
if (sokvawaiters) |
|
wakeup(&socurkva); |
|
} |
|
|
static void |
static void |
sodoloanfree(caddr_t buf, u_int size) |
sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) |
{ |
{ |
struct vm_page **pgs; |
|
vaddr_t va, sva, eva; |
vaddr_t va, sva, eva; |
vsize_t len; |
vsize_t len; |
paddr_t pa; |
paddr_t pa; |
Line 163 sodoloanfree(caddr_t buf, u_int size) |
|
Line 204 sodoloanfree(caddr_t buf, u_int size) |
|
len = eva - sva; |
len = eva - sva; |
npgs = len >> PAGE_SHIFT; |
npgs = len >> PAGE_SHIFT; |
|
|
pgs = alloca(npgs * sizeof(*pgs)); |
if (__predict_false(pgs == NULL)) { |
|
pgs = alloca(npgs * sizeof(*pgs)); |
|
|
for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { |
for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { |
if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) |
if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) |
panic("sodoloanfree: va 0x%lx not mapped", va); |
panic("sodoloanfree: va 0x%lx not mapped", va); |
pgs[i] = PHYS_TO_VM_PAGE(pa); |
pgs[i] = PHYS_TO_VM_PAGE(pa); |
|
} |
} |
} |
|
|
pmap_kremove(sva, len); |
pmap_kremove(sva, len); |
pmap_update(pmap_kernel()); |
pmap_update(pmap_kernel()); |
uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); |
uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); |
uvm_km_free(kernel_map, sva, len); |
sokvafree(sva, len); |
socurkva -= len; |
|
if (sokvawaiters) |
|
wakeup(&socurkva); |
|
} |
} |
|
|
static size_t |
static size_t |
Line 197 sodopendfree(struct socket *so) |
|
Line 237 sodopendfree(struct socket *so) |
|
splx(s); |
splx(s); |
|
|
rv += m->m_ext.ext_size; |
rv += m->m_ext.ext_size; |
sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); |
sodoloanfree((m->m_flags & M_EXT_PAGES) ? |
|
m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
s = splvm(); |
s = splvm(); |
pool_cache_put(&mbpool_cache, m); |
pool_cache_put(&mbpool_cache, m); |
} |
} |
Line 210 sodopendfree(struct socket *so) |
|
Line 252 sodopendfree(struct socket *so) |
|
splx(s); |
splx(s); |
|
|
rv += m->m_ext.ext_size; |
rv += m->m_ext.ext_size; |
sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); |
sodoloanfree((m->m_flags & M_EXT_PAGES) ? |
|
m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, |
|
m->m_ext.ext_size); |
s = splvm(); |
s = splvm(); |
pool_cache_put(&mbpool_cache, m); |
pool_cache_put(&mbpool_cache, m); |
} |
} |
Line 219 sodopendfree(struct socket *so) |
|
Line 263 sodopendfree(struct socket *so) |
|
return (rv); |
return (rv); |
} |
} |
|
|
static void |
void |
soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg) |
soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) |
{ |
{ |
struct socket *so = arg; |
struct socket *so = arg; |
int s; |
int s; |
|
|
if (m == NULL) { |
if (m == NULL) { |
sodoloanfree(buf, size); |
sodoloanfree(NULL, buf, size); |
return; |
return; |
} |
} |
|
|
Line 244 sosend_loan(struct socket *so, struct ui |
|
Line 288 sosend_loan(struct socket *so, struct ui |
|
struct iovec *iov = uio->uio_iov; |
struct iovec *iov = uio->uio_iov; |
vaddr_t sva, eva; |
vaddr_t sva, eva; |
vsize_t len; |
vsize_t len; |
struct vm_page **pgs; |
|
vaddr_t lva, va; |
vaddr_t lva, va; |
int npgs, s, i, error; |
int npgs, i, error; |
|
|
if (uio->uio_segflg != UIO_USERSPACE) |
if (uio->uio_segflg != UIO_USERSPACE) |
return (0); |
return (0); |
Line 261 sosend_loan(struct socket *so, struct ui |
|
Line 304 sosend_loan(struct socket *so, struct ui |
|
len = eva - sva; |
len = eva - sva; |
npgs = len >> PAGE_SHIFT; |
npgs = len >> PAGE_SHIFT; |
|
|
while (socurkva + len > somaxkva) { |
/* XXX KDASSERT */ |
if (sodopendfree(so)) |
KASSERT(npgs <= M_EXT_MAXPAGES); |
continue; |
|
SOSEND_COUNTER_INCR(&sosend_kvalimit); |
|
s = splvm(); |
|
sokvawaiters++; |
|
(void) tsleep(&socurkva, PVM, "sokva", 0); |
|
sokvawaiters--; |
|
splx(s); |
|
} |
|
|
|
lva = uvm_km_valloc_wait(kernel_map, len); |
lva = sokvaalloc(len, so); |
if (lva == 0) |
if (lva == 0) |
return (0); |
return 0; |
socurkva += len; |
|
|
|
pgs = alloca(npgs * sizeof(*pgs)); |
|
|
|
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, |
error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, |
pgs, UVM_LOAN_TOPAGE); |
m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); |
if (error) { |
if (error) { |
uvm_km_free(kernel_map, lva, len); |
sokvafree(lva, len); |
socurkva -= len; |
|
return (0); |
return (0); |
} |
} |
|
|
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ); |
pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), |
|
VM_PROT_READ); |
pmap_update(pmap_kernel()); |
pmap_update(pmap_kernel()); |
|
|
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
lva += (vaddr_t) iov->iov_base & PAGE_MASK; |
|
|
MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); |
MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); |
|
m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; |
|
|
uio->uio_resid -= space; |
uio->uio_resid -= space; |
/* uio_offset not updated, not set/used for write(2) */ |
/* uio_offset not updated, not set/used for write(2) */ |
Line 341 socreate(int dom, struct socket **aso, i |
|
Line 374 socreate(int dom, struct socket **aso, i |
|
so->so_proto = prp; |
so->so_proto = prp; |
so->so_send = sosend; |
so->so_send = sosend; |
so->so_receive = soreceive; |
so->so_receive = soreceive; |
|
#ifdef MBUFTRACE |
|
so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; |
|
so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; |
|
so->so_mowner = &prp->pr_domain->dom_mowner; |
|
#endif |
if (p != 0) |
if (p != 0) |
so->so_uid = p->p_ucred->cr_uid; |
so->so_uid = p->p_ucred->cr_uid; |
error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, |
error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, |
Line 677 sosend(struct socket *so, struct mbuf *a |
|
Line 715 sosend(struct socket *so, struct mbuf *a |
|
top->m_flags |= M_EOR; |
top->m_flags |= M_EOR; |
} else do { |
} else do { |
if (top == 0) { |
if (top == 0) { |
MGETHDR(m, M_WAIT, MT_DATA); |
m = m_gethdr(M_WAIT, MT_DATA); |
mlen = MHLEN; |
mlen = MHLEN; |
m->m_pkthdr.len = 0; |
m->m_pkthdr.len = 0; |
m->m_pkthdr.rcvif = (struct ifnet *)0; |
m->m_pkthdr.rcvif = (struct ifnet *)0; |
} else { |
} else { |
MGET(m, M_WAIT, MT_DATA); |
m = m_get(M_WAIT, MT_DATA); |
mlen = MLEN; |
mlen = MLEN; |
} |
} |
|
MCLAIM(m, so->so_snd.sb_mowner); |
if (use_sosend_loan && |
if (use_sosend_loan && |
uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && |
uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && |
space >= SOCK_LOAN_THRESH && |
space >= SOCK_LOAN_THRESH && |
Line 696 sosend(struct socket *so, struct mbuf *a |
|
Line 735 sosend(struct socket *so, struct mbuf *a |
|
} |
} |
if (resid >= MINCLSIZE && space >= MCLBYTES) { |
if (resid >= MINCLSIZE && space >= MCLBYTES) { |
SOSEND_COUNTER_INCR(&sosend_copy_big); |
SOSEND_COUNTER_INCR(&sosend_copy_big); |
MCLGET(m, M_WAIT); |
m_clget(m, M_WAIT); |
if ((m->m_flags & M_EXT) == 0) |
if ((m->m_flags & M_EXT) == 0) |
goto nopages; |
goto nopages; |
mlen = MCLBYTES; |
mlen = MCLBYTES; |
Line 1340 sosetopt(struct socket *so, int level, i |
|
Line 1379 sosetopt(struct socket *so, int level, i |
|
goto bad; |
goto bad; |
} |
} |
tv = mtod(m, struct timeval *); |
tv = mtod(m, struct timeval *); |
if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { |
if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { |
error = EDOM; |
error = EDOM; |
goto bad; |
goto bad; |
} |
} |
val = tv->tv_sec * hz + tv->tv_usec / tick; |
val = tv->tv_sec * hz + tv->tv_usec / tick; |
|
if (val == 0 && tv->tv_usec != 0) |
|
val = 1; |
|
|
switch (optname) { |
switch (optname) { |
|
|
Line 1468 sohasoutofband(struct socket *so) |
|
Line 1509 sohasoutofband(struct socket *so) |
|
psignal(p, SIGURG); |
psignal(p, SIGURG); |
selwakeup(&so->so_rcv.sb_sel); |
selwakeup(&so->so_rcv.sb_sel); |
} |
} |
|
|
|
static void |
|
filt_sordetach(struct knote *kn) |
|
{ |
|
struct socket *so; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); |
|
if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) |
|
so->so_rcv.sb_flags &= ~SB_KNOTE; |
|
} |
|
|
|
/*ARGSUSED*/ |
|
static int |
|
filt_soread(struct knote *kn, long hint) |
|
{ |
|
struct socket *so; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
kn->kn_data = so->so_rcv.sb_cc; |
|
if (so->so_state & SS_CANTRCVMORE) { |
|
kn->kn_flags |= EV_EOF; |
|
kn->kn_fflags = so->so_error; |
|
return (1); |
|
} |
|
if (so->so_error) /* temporary udp error */ |
|
return (1); |
|
if (kn->kn_sfflags & NOTE_LOWAT) |
|
return (kn->kn_data >= kn->kn_sdata); |
|
return (kn->kn_data >= so->so_rcv.sb_lowat); |
|
} |
|
|
|
static void |
|
filt_sowdetach(struct knote *kn) |
|
{ |
|
struct socket *so; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); |
|
if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) |
|
so->so_snd.sb_flags &= ~SB_KNOTE; |
|
} |
|
|
|
/*ARGSUSED*/ |
|
static int |
|
filt_sowrite(struct knote *kn, long hint) |
|
{ |
|
struct socket *so; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
kn->kn_data = sbspace(&so->so_snd); |
|
if (so->so_state & SS_CANTSENDMORE) { |
|
kn->kn_flags |= EV_EOF; |
|
kn->kn_fflags = so->so_error; |
|
return (1); |
|
} |
|
if (so->so_error) /* temporary udp error */ |
|
return (1); |
|
if (((so->so_state & SS_ISCONNECTED) == 0) && |
|
(so->so_proto->pr_flags & PR_CONNREQUIRED)) |
|
return (0); |
|
if (kn->kn_sfflags & NOTE_LOWAT) |
|
return (kn->kn_data >= kn->kn_sdata); |
|
return (kn->kn_data >= so->so_snd.sb_lowat); |
|
} |
|
|
|
/*ARGSUSED*/ |
|
static int |
|
filt_solisten(struct knote *kn, long hint) |
|
{ |
|
struct socket *so; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
|
|
/* |
|
* Set kn_data to number of incoming connections, not |
|
* counting partial (incomplete) connections. |
|
*/ |
|
kn->kn_data = so->so_qlen; |
|
return (kn->kn_data > 0); |
|
} |
|
|
|
static const struct filterops solisten_filtops = |
|
{ 1, NULL, filt_sordetach, filt_solisten }; |
|
static const struct filterops soread_filtops = |
|
{ 1, NULL, filt_sordetach, filt_soread }; |
|
static const struct filterops sowrite_filtops = |
|
{ 1, NULL, filt_sowdetach, filt_sowrite }; |
|
|
|
int |
|
soo_kqfilter(struct file *fp, struct knote *kn) |
|
{ |
|
struct socket *so; |
|
struct sockbuf *sb; |
|
|
|
so = (struct socket *)kn->kn_fp->f_data; |
|
switch (kn->kn_filter) { |
|
case EVFILT_READ: |
|
if (so->so_options & SO_ACCEPTCONN) |
|
kn->kn_fop = &solisten_filtops; |
|
else |
|
kn->kn_fop = &soread_filtops; |
|
sb = &so->so_rcv; |
|
break; |
|
case EVFILT_WRITE: |
|
kn->kn_fop = &sowrite_filtops; |
|
sb = &so->so_snd; |
|
break; |
|
default: |
|
return (1); |
|
} |
|
SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); |
|
sb->sb_flags |= SB_KNOTE; |
|
return (0); |
|
} |
|
|