Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.71 retrieving revision 1.83 diff -u -p -r1.71 -r1.83 --- src/sys/kern/uipc_socket.c 2002/08/21 05:13:37 1.71 +++ src/sys/kern/uipc_socket.c 2003/06/29 22:31:30 1.83 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.71 2002/08/21 05:13:37 thorpej Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.83 2003/06/29 22:31:30 fvdl Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -72,10 +72,11 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.71 2002/08/21 05:13:37 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.83 2003/06/29 22:31:30 fvdl Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" +#include "opt_mbuftrace.h" #include #include @@ -91,11 +92,15 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #include struct pool socket_pool; +MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); +MALLOC_DEFINE(M_SONAME, "soname", "socket name"); + extern int somaxconn; /* patchable (XXX sysctl) */ int somaxconn = SOMAXCONN; @@ -149,10 +154,46 @@ int sokvawaiters; #define SOCK_LOAN_THRESH 4096 #define SOCK_LOAN_CHUNK 65536 +static size_t sodopendfree(struct socket *); + +vaddr_t +sokvaalloc(vsize_t len, struct socket *so) +{ + vaddr_t lva; + int s; + + while (socurkva + len > somaxkva) { + if (sodopendfree(so)) + continue; + SOSEND_COUNTER_INCR(&sosend_kvalimit); + s = splvm(); + sokvawaiters++; + (void) tsleep(&socurkva, PVM, "sokva", 0); + sokvawaiters--; + splx(s); + } + + lva = uvm_km_valloc_wait(kernel_map, len); + if (lva == 0) + return (0); + socurkva += len; + + return lva; +} + +void +sokvafree(vaddr_t sva, vsize_t len) +{ + + uvm_km_free(kernel_map, sva, len); + socurkva -= len; + if (sokvawaiters) + wakeup(&socurkva); +} + static void -sodoloanfree(caddr_t buf, u_int size) +sodoloanfree(struct vm_page **pgs, caddr_t buf, size_t size) { - struct vm_page **pgs; vaddr_t va, sva, eva; vsize_t len; paddr_t pa; @@ -163,21 +204,20 @@ sodoloanfree(caddr_t buf, u_int size) len = eva - sva; npgs = len >> PAGE_SHIFT; - pgs = alloca(npgs * sizeof(*pgs)); + if (__predict_false(pgs == NULL)) { + pgs = alloca(npgs * sizeof(*pgs)); - for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { - if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) - panic("sodoloanfree: va 0x%lx not mapped", va); - pgs[i] = PHYS_TO_VM_PAGE(pa); + for (i = 0, va = sva; va < eva; i++, va += PAGE_SIZE) { + if (pmap_extract(pmap_kernel(), va, &pa) == FALSE) + panic("sodoloanfree: va 0x%lx not mapped", va); + pgs[i] = PHYS_TO_VM_PAGE(pa); + } } pmap_kremove(sva, len); pmap_update(pmap_kernel()); uvm_unloan(pgs, npgs, UVM_LOAN_TOPAGE); - uvm_km_free(kernel_map, sva, len); - socurkva -= len; - if (sokvawaiters) - wakeup(&socurkva); + sokvafree(sva, len); } static size_t @@ -197,7 +237,9 @@ sodopendfree(struct socket *so) splx(s); rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); + sodoloanfree((m->m_flags & M_EXT_PAGES) ? + m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, + m->m_ext.ext_size); s = splvm(); pool_cache_put(&mbpool_cache, m); } @@ -210,7 +252,9 @@ sodopendfree(struct socket *so) splx(s); rv += m->m_ext.ext_size; - sodoloanfree(m->m_ext.ext_buf, m->m_ext.ext_size); + sodoloanfree((m->m_flags & M_EXT_PAGES) ? + m->m_ext.ext_pgs : NULL, m->m_ext.ext_buf, + m->m_ext.ext_size); s = splvm(); pool_cache_put(&mbpool_cache, m); } @@ -219,14 +263,14 @@ sodopendfree(struct socket *so) return (rv); } -static void -soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg) +void +soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) { struct socket *so = arg; int s; if (m == NULL) { - sodoloanfree(buf, size); + sodoloanfree(NULL, buf, size); return; } @@ -244,9 +288,8 @@ sosend_loan(struct socket *so, struct ui struct iovec *iov = uio->uio_iov; vaddr_t sva, eva; vsize_t len; - struct vm_page **pgs; vaddr_t lva, va; - int npgs, s, i, error; + int npgs, i, error; if (uio->uio_segflg != UIO_USERSPACE) return (0); @@ -261,39 +304,29 @@ sosend_loan(struct socket *so, struct ui len = eva - sva; npgs = len >> PAGE_SHIFT; - while (socurkva + len > somaxkva) { - if (sodopendfree(so)) - continue; - SOSEND_COUNTER_INCR(&sosend_kvalimit); - s = splvm(); - sokvawaiters++; - (void) tsleep(&socurkva, PVM, "sokva", 0); - sokvawaiters--; - splx(s); - } + /* XXX KDASSERT */ + KASSERT(npgs <= M_EXT_MAXPAGES); - lva = uvm_km_valloc_wait(kernel_map, len); + lva = sokvaalloc(len, so); if (lva == 0) - return (0); - socurkva += len; - - pgs = alloca(npgs * sizeof(*pgs)); + return 0; error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, - pgs, UVM_LOAN_TOPAGE); + m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); if (error) { - uvm_km_free(kernel_map, lva, len); - socurkva -= len; + sokvafree(lva, len); return (0); } for (i = 0, va = lva; i < npgs; i++, va += PAGE_SIZE) - pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pgs[i]), VM_PROT_READ); + pmap_kenter_pa(va, VM_PAGE_TO_PHYS(m->m_ext.ext_pgs[i]), + VM_PROT_READ); pmap_update(pmap_kernel()); lva += (vaddr_t) iov->iov_base & PAGE_MASK; MEXTADD(m, (caddr_t) lva, space, M_MBUF, soloanfree, so); + m->m_flags |= M_EXT_PAGES | M_EXT_ROMAP; uio->uio_resid -= space; /* uio_offset not updated, not set/used for write(2) */ @@ -341,6 +374,11 @@ socreate(int dom, struct socket **aso, i so->so_proto = prp; so->so_send = sosend; so->so_receive = soreceive; +#ifdef MBUFTRACE + so->so_rcv.sb_mowner = &prp->pr_domain->dom_mowner; + so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; + so->so_mowner = &prp->pr_domain->dom_mowner; +#endif if (p != 0) so->so_uid = p->p_ucred->cr_uid; error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, @@ -677,14 +715,15 @@ sosend(struct socket *so, struct mbuf *a top->m_flags |= M_EOR; } else do { if (top == 0) { - MGETHDR(m, M_WAIT, MT_DATA); + m = m_gethdr(M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; } else { - MGET(m, M_WAIT, MT_DATA); + m = m_get(M_WAIT, MT_DATA); mlen = MLEN; } + MCLAIM(m, so->so_snd.sb_mowner); if (use_sosend_loan && uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && space >= SOCK_LOAN_THRESH && @@ -696,7 +735,7 @@ sosend(struct socket *so, struct mbuf *a } if (resid >= MINCLSIZE && space >= MCLBYTES) { SOSEND_COUNTER_INCR(&sosend_copy_big); - MCLGET(m, M_WAIT); + m_clget(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; @@ -1340,11 +1379,13 @@ sosetopt(struct socket *so, int level, i goto bad; } tv = mtod(m, struct timeval *); - if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { + if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { error = EDOM; goto bad; } val = tv->tv_sec * hz + tv->tv_usec / tick; + if (val == 0 && tv->tv_usec != 0) + val = 1; switch (optname) { @@ -1468,3 +1509,119 @@ sohasoutofband(struct socket *so) psignal(p, SIGURG); selwakeup(&so->so_rcv.sb_sel); } + +static void +filt_sordetach(struct knote *kn) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) + so->so_rcv.sb_flags &= ~SB_KNOTE; +} + +/*ARGSUSED*/ +static int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = so->so_rcv.sb_cc; + if (so->so_state & SS_CANTRCVMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_rcv.sb_lowat); +} + +static void +filt_sowdetach(struct knote *kn) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) + so->so_snd.sb_flags &= ~SB_KNOTE; +} + +/*ARGSUSED*/ +static int +filt_sowrite(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = sbspace(&so->so_snd); + if (so->so_state & SS_CANTSENDMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (((so->so_state & SS_ISCONNECTED) == 0) && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) + return (0); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_snd.sb_lowat); +} + +/*ARGSUSED*/ +static int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + + /* + * Set kn_data to number of incoming connections, not + * counting partial (incomplete) connections. + */ + kn->kn_data = so->so_qlen; + return (kn->kn_data > 0); +} + +static const struct filterops solisten_filtops = + { 1, NULL, filt_sordetach, filt_solisten }; +static const struct filterops soread_filtops = + { 1, NULL, filt_sordetach, filt_soread }; +static const struct filterops sowrite_filtops = + { 1, NULL, filt_sowdetach, filt_sowrite }; + +int +soo_kqfilter(struct file *fp, struct knote *kn) +{ + struct socket *so; + struct sockbuf *sb; + + so = (struct socket *)kn->kn_fp->f_data; + switch (kn->kn_filter) { + case EVFILT_READ: + if (so->so_options & SO_ACCEPTCONN) + kn->kn_fop = &solisten_filtops; + else + kn->kn_fop = &soread_filtops; + sb = &so->so_rcv; + break; + case EVFILT_WRITE: + kn->kn_fop = &sowrite_filtops; + sb = &so->so_snd; + break; + default: + return (1); + } + SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); + sb->sb_flags |= SB_KNOTE; + return (0); +} +