Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.97 retrieving revision 1.116.4.2 diff -u -p -r1.97 -r1.116.4.2 --- src/sys/kern/uipc_socket.c 2004/03/24 15:34:53 1.97 +++ src/sys/kern/uipc_socket.c 2006/04/19 05:14:00 1.116.4.2 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.97 2004/03/24 15:34:53 atatat Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.116.4.2 2006/04/19 05:14:00 elad Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -68,7 +68,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.97 2004/03/24 15:34:53 atatat Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.116.4.2 2006/04/19 05:14:00 elad Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -94,7 +94,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include -struct pool socket_pool; +POOL_INIT(socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); @@ -105,17 +105,21 @@ int somaxconn = SOMAXCONN; #ifdef SOSEND_COUNTERS #include -struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +static struct evcnt sosend_loan_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "sosend", "loan big"); -struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +static struct evcnt sosend_copy_big = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "sosend", "copy big"); -struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +static struct evcnt sosend_copy_small = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "sosend", "copy small"); -struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, +static struct evcnt sosend_kvalimit = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "sosend", "kva limit"); #define SOSEND_COUNTER_INCR(ev) (ev)->ev_count++ +EVCNT_ATTACH_STATIC(sosend_loan_big); +EVCNT_ATTACH_STATIC(sosend_copy_big); +EVCNT_ATTACH_STATIC(sosend_copy_small); +EVCNT_ATTACH_STATIC(sosend_kvalimit); #else #define SOSEND_COUNTER_INCR(ev) /* nothing */ @@ -128,17 +132,8 @@ soinit(void) /* Set the initial adjusted socket buffer size. */ if (sb_max_set(sb_max)) - panic("bad initial sb_max value: %lu\n", sb_max); + panic("bad initial sb_max value: %lu", sb_max); - pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, - "sockpl", NULL); - -#ifdef SOSEND_COUNTERS - evcnt_attach_static(&sosend_loan_big); - evcnt_attach_static(&sosend_copy_big); - evcnt_attach_static(&sosend_copy_small); - evcnt_attach_static(&sosend_kvalimit); -#endif /* SOSEND_COUNTERS */ } #ifdef SOSEND_NO_LOAN @@ -147,28 +142,27 @@ int use_sosend_loan = 0; int use_sosend_loan = 1; #endif -struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; -struct mbuf *so_pendfree; +static struct simplelock so_pendfree_slock = SIMPLELOCK_INITIALIZER; +static struct mbuf *so_pendfree; #ifndef SOMAXKVA #define SOMAXKVA (16 * 1024 * 1024) #endif int somaxkva = SOMAXKVA; -int socurkva; -int sokvawaiters; +static int socurkva; +static int sokvawaiters; #define SOCK_LOAN_THRESH 4096 #define SOCK_LOAN_CHUNK 65536 -static size_t sodopendfree(struct socket *); -static size_t sodopendfreel(struct socket *); -static __inline void sokvareserve(struct socket *, vsize_t); -static __inline void sokvaunreserve(vsize_t); +static size_t sodopendfree(void); +static size_t sodopendfreel(void); -static __inline void +static vsize_t sokvareserve(struct socket *so, vsize_t len) { int s; + int error; s = splvm(); simple_lock(&so_pendfree_slock); @@ -179,7 +173,7 @@ sokvareserve(struct socket *so, vsize_t * try to do pendfree. */ - freed = sodopendfreel(so); + freed = sodopendfreel(); /* * if some kva was freed, try again. @@ -190,15 +184,21 @@ sokvareserve(struct socket *so, vsize_t SOSEND_COUNTER_INCR(&sosend_kvalimit); sokvawaiters++; - (void) ltsleep(&socurkva, PVM, "sokva", 0, &so_pendfree_slock); + error = ltsleep(&socurkva, PVM | PCATCH, "sokva", 0, + &so_pendfree_slock); sokvawaiters--; + if (error) { + len = 0; + break; + } } socurkva += len; simple_unlock(&so_pendfree_slock); splx(s); + return len; } -static __inline void +static void sokvaunreserve(vsize_t len) { int s; @@ -225,13 +225,14 @@ sokvaalloc(vsize_t len, struct socket *s * reserve kva. */ - sokvareserve(so, len); + if (sokvareserve(so, len) == 0) + return 0; /* * allocate kva. */ - lva = uvm_km_valloc_wait(kernel_map, len); + lva = uvm_km_alloc(kernel_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA); if (lva == 0) { sokvaunreserve(len); return (0); @@ -252,7 +253,7 @@ sokvafree(vaddr_t sva, vsize_t len) * free kva. */ - uvm_km_free(kernel_map, sva, len); + uvm_km_free(kernel_map, sva, len, UVM_KMF_VAONLY); /* * unreserve kva. @@ -291,14 +292,14 @@ sodoloanfree(struct vm_page **pgs, caddr } static size_t -sodopendfree(struct socket *so) +sodopendfree() { int s; size_t rv; s = splvm(); simple_lock(&so_pendfree_slock); - rv = sodopendfreel(so); + rv = sodopendfreel(); simple_unlock(&so_pendfree_slock); splx(s); @@ -314,7 +315,7 @@ sodopendfree(struct socket *so) */ static size_t -sodopendfreel(struct socket *so) +sodopendfreel() { size_t rv = 0; @@ -389,7 +390,7 @@ sosend_loan(struct socket *so, struct ui vaddr_t lva, va; int npgs, i, error; - if (uio->uio_segflg != UIO_USERSPACE) + if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) return (0); if (iov->iov_len < (size_t) space) @@ -409,7 +410,7 @@ sosend_loan(struct socket *so, struct ui if (lva == 0) return 0; - error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, sva, len, + error = uvm_loan(&uio->uio_vmspace->vm_map, sva, len, m->m_ext.ext_pgs, UVM_LOAN_TOPAGE); if (error) { sokvafree(lva, len); @@ -447,14 +448,13 @@ sosend_loan(struct socket *so, struct ui */ /*ARGSUSED*/ int -socreate(int dom, struct socket **aso, int type, int proto) +socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l) { - struct proc *p; - struct protosw *prp; + const struct protosw *prp; struct socket *so; + uid_t uid; int error, s; - p = curproc; /* XXX */ if (proto) prp = pffindproto(dom, proto, type); else @@ -477,10 +477,14 @@ socreate(int dom, struct socket **aso, i so->so_snd.sb_mowner = &prp->pr_domain->dom_mowner; so->so_mowner = &prp->pr_domain->dom_mowner; #endif - if (p != 0) - so->so_uid = p->p_ucred->cr_uid; + if (l != NULL) { + uid = kauth_cred_geteuid(l->l_proc->p_cred); + } else { + uid = 0; + } + so->so_uidinfo = uid_find(uid); error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, - (struct mbuf *)(long)proto, (struct mbuf *)0, p); + (struct mbuf *)(long)proto, (struct mbuf *)0, l); if (error) { so->so_state |= SS_NOFDREF; sofree(so); @@ -493,13 +497,13 @@ socreate(int dom, struct socket **aso, i } int -sobind(struct socket *so, struct mbuf *nam, struct proc *p) +sobind(struct socket *so, struct mbuf *nam, struct lwp *l) { int s, error; s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, - nam, (struct mbuf *)0, p); + nam, (struct mbuf *)0, l); splx(s); return (error); } @@ -511,7 +515,7 @@ solisten(struct socket *so, int backlog) s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); if (error) { splx(s); return (error); @@ -540,7 +544,13 @@ sofree(struct socket *so) if (!soqremque(so, 0)) return; } - sbrelease(&so->so_snd); + if (so->so_rcv.sb_hiwat) + (void)chgsbsize(so->so_uidinfo, &so->so_rcv.sb_hiwat, 0, + RLIM_INFINITY); + if (so->so_snd.sb_hiwat) + (void)chgsbsize(so->so_uidinfo, &so->so_snd.sb_hiwat, 0, + RLIM_INFINITY); + sbrelease(&so->so_snd, so); sorflush(so); pool_put(&socket_pool, so); } @@ -593,7 +603,7 @@ soclose(struct socket *so) if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, - (struct proc *)0); + (struct lwp *)0); if (error == 0) error = error2; } @@ -614,7 +624,7 @@ soabort(struct socket *so) { return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); } int @@ -630,7 +640,7 @@ soaccept(struct socket *so, struct mbuf if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, - (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, nam, (struct mbuf *)0, (struct lwp *)0); else error = ECONNABORTED; @@ -639,12 +649,10 @@ soaccept(struct socket *so, struct mbuf } int -soconnect(struct socket *so, struct mbuf *nam) +soconnect(struct socket *so, struct mbuf *nam, struct lwp *l) { - struct proc *p; int s, error; - p = curproc; /* XXX */ if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splsoftnet(); @@ -660,7 +668,7 @@ soconnect(struct socket *so, struct mbuf error = EISCONN; else error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, - (struct mbuf *)0, nam, (struct mbuf *)0, p); + (struct mbuf *)0, nam, (struct mbuf *)0, l); splx(s); return (error); } @@ -673,7 +681,7 @@ soconnect2(struct socket *so1, struct so s = splsoftnet(); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, - (struct proc *)0); + (struct lwp *)0); splx(s); return (error); } @@ -694,10 +702,10 @@ sodisconnect(struct socket *so) } error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, - (struct proc *)0); + (struct lwp *)0); bad: splx(s); - sodopendfree(so); + sodopendfree(); return (error); } @@ -721,16 +729,16 @@ sodisconnect(struct socket *so) */ int sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, - struct mbuf *control, int flags) + struct mbuf *control, int flags, struct lwp *l) { - struct proc *p; struct mbuf **mp, *m; + struct proc *p; long space, len, resid, clen, mlen; int error, s, dontroute, atomic; - sodopendfree(so); + p = l->l_proc; + sodopendfree(); - p = curproc; /* XXX */ clen = 0; atomic = sosendallatonce(so) || top; if (uio) @@ -751,7 +759,8 @@ sosend(struct socket *so, struct mbuf *a dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); - p->p_stats->p_ru.ru_msgsnd++; + if (p) + p->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; #define snderr(errno) { error = errno; splx(s); goto release; } @@ -866,7 +875,7 @@ sosend(struct socket *so, struct mbuf *a break; } } while (space > 0 && atomic); - + s = splsoftnet(); if (so->so_state & SS_CANTSENDMORE) @@ -878,7 +887,7 @@ sosend(struct socket *so, struct mbuf *a so->so_state |= SS_MORETOCOME; error = (*so->so_proto->pr_usrreq)(so, (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, - top, addr, control, p); + top, addr, control, curlwp); /* XXX */ if (dontroute) so->so_options &= ~SO_DONTROUTE; if (resid > 0) @@ -924,9 +933,10 @@ int soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { + struct lwp *l = curlwp; struct mbuf *m, **mp; int flags, len, error, s, offset, moff, type, orig_resid; - struct protosw *pr; + const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; @@ -934,6 +944,7 @@ soreceive(struct socket *so, struct mbuf mp = mp0; type = 0; orig_resid = uio->uio_resid; + if (paddr) *paddr = 0; if (controlp) @@ -944,13 +955,13 @@ soreceive(struct socket *so, struct mbuf flags = 0; if ((flags & MSG_DONTWAIT) == 0) - sodopendfree(so); + sodopendfree(); if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)(long)(flags & MSG_PEEK), + (struct mbuf *)0, l); if (error) goto bad; do { @@ -967,7 +978,7 @@ soreceive(struct socket *so, struct mbuf *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, (struct mbuf *)0, l); restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) @@ -1040,10 +1051,8 @@ soreceive(struct socket *so, struct mbuf * While we process the initial mbufs containing address and control * info, we save a copy of m->m_nextpkt into nextrecord. */ -#ifdef notyet /* XXXX */ - if (uio->uio_procp) - uio->uio_procp->p_stats->p_ru.ru_msgrcv++; -#endif + if (l) + l->l_proc->p_stats->p_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); @@ -1081,15 +1090,23 @@ soreceive(struct socket *so, struct mbuf sbfree(&so->so_rcv, m); mbuf_removed = 1; if (controlp) { - if (pr->pr_domain->dom_externalize && + struct domain *dom = pr->pr_domain; + if (dom->dom_externalize && l && mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) - error = (*pr->pr_domain->dom_externalize)(m); + error = (*dom->dom_externalize)(m, l); *controlp = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { + /* + * Dispose of any SCM_RIGHTS message that went + * through the read path rather than recv. + */ + if (pr->pr_domain->dom_dispose && + mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) + (*pr->pr_domain->dom_dispose)(m); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } @@ -1270,8 +1287,7 @@ soreceive(struct socket *so, struct mbuf (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)(long)flags, - (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)0, l); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); @@ -1308,8 +1324,7 @@ soreceive(struct socket *so, struct mbuf SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, - (struct mbuf *)(long)flags, (struct mbuf *)0, - (struct proc *)0); + (struct mbuf *)(long)flags, (struct mbuf *)0, l); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1317,7 +1332,7 @@ soreceive(struct socket *so, struct mbuf splx(s); goto restart; } - + if (flagsp) *flagsp |= flags; release: @@ -1329,7 +1344,7 @@ soreceive(struct socket *so, struct mbuf int soshutdown(struct socket *so, int how) { - struct protosw *pr; + const struct protosw *pr; pr = so->so_proto; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) @@ -1339,7 +1354,7 @@ soshutdown(struct socket *so, int how) sorflush(so); if (how == SHUT_WR || how == SHUT_RDWR) return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, (struct mbuf *)0, - (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); + (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); return (0); } @@ -1347,7 +1362,7 @@ void sorflush(struct socket *so) { struct sockbuf *sb, asb; - struct protosw *pr; + const struct protosw *pr; int s; sb = &so->so_rcv; @@ -1367,7 +1382,7 @@ sorflush(struct socket *so) splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); - sbrelease(&asb); + sbrelease(&asb, so); } int @@ -1391,6 +1406,11 @@ sosetopt(struct socket *so, int level, i error = EINVAL; goto bad; } + if (mtod(m, struct linger *)->l_linger < 0 || + mtod(m, struct linger *)->l_linger > (INT_MAX / hz)) { + error = EDOM; + goto bad; + } so->so_linger = mtod(m, struct linger *)->l_linger; /* fall thru... */ @@ -1441,7 +1461,7 @@ sosetopt(struct socket *so, int level, i case SO_RCVBUF: if (sbreserve(optname == SO_SNDBUF ? &so->so_snd : &so->so_rcv, - (u_long) optval) == 0) { + (u_long) optval, so) == 0) { error = ENOBUFS; goto bad; } @@ -1469,14 +1489,14 @@ sosetopt(struct socket *so, int level, i case SO_RCVTIMEO: { struct timeval *tv; - short val; + int val; if (m == NULL || m->m_len < sizeof(*tv)) { error = EINVAL; goto bad; } tv = mtod(m, struct timeval *); - if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { + if (tv->tv_sec > (INT_MAX - tv->tv_usec / tick) / hz) { error = EDOM; goto bad; } @@ -1586,6 +1606,10 @@ sogetopt(struct socket *so, int level, i break; } + case SO_OVERFLOWED: + *mtod(m, int *) = so->so_rcv.sb_overflowed; + break; + default: (void)m_free(m); return (ENOPROTOOPT); @@ -1622,7 +1646,7 @@ filt_soread(struct knote *kn, long hint) so = (struct socket *)kn->kn_fp->f_data; kn->kn_data = so->so_rcv.sb_cc; if (so->so_state & SS_CANTRCVMORE) { - kn->kn_flags |= EV_EOF; + kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } @@ -1653,7 +1677,7 @@ filt_sowrite(struct knote *kn, long hint so = (struct socket *)kn->kn_fp->f_data; kn->kn_data = sbspace(&so->so_snd); if (so->so_state & SS_CANTSENDMORE) { - kn->kn_flags |= EV_EOF; + kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } @@ -1678,7 +1702,7 @@ filt_solisten(struct knote *kn, long hin /* * Set kn_data to number of incoming connections, not * counting partial (incomplete) connections. - */ + */ kn->kn_data = so->so_qlen; return (kn->kn_data > 0); } @@ -1764,7 +1788,9 @@ SYSCTL_SETUP(sysctl_kern_somaxkva_setup, sysctl_createv(clog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, - CTLTYPE_INT, "somaxkva", NULL, + CTLTYPE_INT, "somaxkva", + SYSCTL_DESCR("Maximum amount of kernel memory to be " + "used for socket buffers"), sysctl_kern_somaxkva, 0, NULL, 0, CTL_KERN, KERN_SOMAXKVA, CTL_EOL); }