Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.65 retrieving revision 1.76 diff -u -p -r1.65 -r1.76 --- src/sys/kern/uipc_socket.c 2002/05/03 00:35:14 1.65 +++ src/sys/kern/uipc_socket.c 2003/01/31 05:00:24 1.76 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.65 2002/05/03 00:35:14 thorpej Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.76 2003/01/31 05:00:24 thorpej Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.65 2002/05/03 00:35:14 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.76 2003/01/31 05:00:24 thorpej Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -91,6 +91,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #include @@ -134,10 +135,10 @@ soinit(void) #endif /* SOSEND_COUNTERS */ } -#ifdef SOSEND_LOAN -int use_sosend_loan = 1; -#else +#ifdef SOSEND_NO_LOAN int use_sosend_loan = 0; +#else +int use_sosend_loan = 1; #endif struct mbuf *so_pendfree; @@ -150,7 +151,7 @@ int sokvawaiters; #define SOCK_LOAN_CHUNK 65536 static void -sodoloanfree(caddr_t buf, u_int size) +sodoloanfree(caddr_t buf, size_t size) { struct vm_page **pgs; vaddr_t va, sva, eva; @@ -220,7 +221,7 @@ sodopendfree(struct socket *so) } static void -soloanfree(struct mbuf *m, caddr_t buf, u_int size, void *arg) +soloanfree(struct mbuf *m, caddr_t buf, size_t size, void *arg) { struct socket *so = arg; int s; @@ -797,8 +798,7 @@ soreceive(struct socket *so, struct mbuf int flags, len, error, s, offset, moff, type, orig_resid; struct protosw *pr; struct mbuf *nextrecord; - - sodopendfree(so); + int mbuf_removed = 0; pr = so->so_proto; mp = mp0; @@ -812,6 +812,10 @@ soreceive(struct socket *so, struct mbuf flags = *flagsp &~ MSG_EOR; else flags = 0; + + if ((flags & MSG_DONTWAIT) == 0) + sodopendfree(so); + if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, @@ -891,6 +895,8 @@ soreceive(struct socket *so, struct mbuf error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -899,10 +905,18 @@ soreceive(struct socket *so, struct mbuf goto restart; } dontblock: + /* + * On entry here, m points to the first record of the socket buffer. + * While we process the initial mbufs containing address and control + * info, we save a copy of m->m_nextpkt into nextrecord. + */ #ifdef notyet /* XXXX */ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; #endif + KASSERT(m == so->so_rcv.sb_mb); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC @@ -916,6 +930,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; @@ -934,6 +949,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == @@ -953,13 +969,39 @@ soreceive(struct socket *so, struct mbuf controlp = &(*controlp)->m_next; } } + + /* + * If m is non-NULL, we have some data to read. From now on, + * make sure to keep sb_lastrecord consistent when working on + * the last packet on the chain (nextrecord == NULL) and we + * change m->m_nextpkt. + */ if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -987,11 +1029,29 @@ soreceive(struct socket *so, struct mbuf * block interrupts again. */ if (mp == 0) { + SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splsoftnet(); - if (error) + if (error) { + /* + * If any part of the record has been removed + * (such as the MT_SONAME mbuf, which will + * happen when PR_ADDR, and thus also + * PR_ATOMIC, is set), then drop the entire + * record to maintain the atomicity of the + * receive operation. + * + * This avoids a later panic("receive 1a") + * when compiled with DIAGNOSTIC. + */ + if (m && mbuf_removed + && (pr->pr_flags & PR_ATOMIC)) + (void) sbdroprecord(&so->so_rcv); + goto release; + } } else uio->uio_resid -= len; if (len == m->m_len - moff) { @@ -1012,8 +1072,21 @@ soreceive(struct socket *so, struct mbuf MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } - if (m) + /* + * If m != NULL, we also know that + * so->so_rcv.sb_mb != NULL. + */ + KASSERT(so->so_rcv.sb_mb == m); + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } } else { if (flags & MSG_PEEK) @@ -1052,6 +1125,25 @@ soreceive(struct socket *so, struct mbuf !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; + /* + * If we are peeking and the socket receive buffer is + * full, stop since we can't get more data to peek at. + */ + if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) + break; + /* + * If we've drained the socket buffer, tell the + * protocol in case it needs to do something to + * get it filled again. + */ + if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) + (*pr->pr_usrreq)(so, PRU_RCVD, + (struct mbuf *)0, + (struct mbuf *)(long)flags, + (struct mbuf *)0, + (struct proc *)0); + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1069,8 +1161,21 @@ soreceive(struct socket *so, struct mbuf (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)(long)flags, (struct mbuf *)0, @@ -1236,11 +1341,13 @@ sosetopt(struct socket *so, int level, i goto bad; } tv = mtod(m, struct timeval *); - if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { + if (tv->tv_sec > (SHRT_MAX - tv->tv_usec / tick) / hz) { error = EDOM; goto bad; } val = tv->tv_sec * hz + tv->tv_usec / tick; + if (val == 0 && tv->tv_usec != 0) + val = 1; switch (optname) { @@ -1364,3 +1471,119 @@ sohasoutofband(struct socket *so) psignal(p, SIGURG); selwakeup(&so->so_rcv.sb_sel); } + +static void +filt_sordetach(struct knote *kn) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + SLIST_REMOVE(&so->so_rcv.sb_sel.sel_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_rcv.sb_sel.sel_klist)) + so->so_rcv.sb_flags &= ~SB_KNOTE; +} + +/*ARGSUSED*/ +static int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = so->so_rcv.sb_cc; + if (so->so_state & SS_CANTRCVMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_rcv.sb_lowat); +} + +static void +filt_sowdetach(struct knote *kn) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + SLIST_REMOVE(&so->so_snd.sb_sel.sel_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_snd.sb_sel.sel_klist)) + so->so_snd.sb_flags &= ~SB_KNOTE; +} + +/*ARGSUSED*/ +static int +filt_sowrite(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = sbspace(&so->so_snd); + if (so->so_state & SS_CANTSENDMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (((so->so_state & SS_ISCONNECTED) == 0) && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) + return (0); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_snd.sb_lowat); +} + +/*ARGSUSED*/ +static int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + + /* + * Set kn_data to number of incoming connections, not + * counting partial (incomplete) connections. + */ + kn->kn_data = so->so_qlen; + return (kn->kn_data > 0); +} + +static const struct filterops solisten_filtops = + { 1, NULL, filt_sordetach, filt_solisten }; +static const struct filterops soread_filtops = + { 1, NULL, filt_sordetach, filt_soread }; +static const struct filterops sowrite_filtops = + { 1, NULL, filt_sowdetach, filt_sowrite }; + +int +soo_kqfilter(struct file *fp, struct knote *kn) +{ + struct socket *so; + struct sockbuf *sb; + + so = (struct socket *)kn->kn_fp->f_data; + switch (kn->kn_filter) { + case EVFILT_READ: + if (so->so_options & SO_ACCEPTCONN) + kn->kn_fop = &solisten_filtops; + else + kn->kn_fop = &soread_filtops; + sb = &so->so_rcv; + break; + case EVFILT_WRITE: + kn->kn_fop = &sowrite_filtops; + sb = &so->so_snd; + break; + default: + return (1); + } + SLIST_INSERT_HEAD(&sb->sb_sel.sel_klist, kn, kn_selnext); + sb->sb_flags |= SB_KNOTE; + return (0); +} +