Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.64 retrieving revision 1.66.2.3 diff -u -p -r1.64 -r1.66.2.3 --- src/sys/kern/uipc_socket.c 2002/05/02 17:55:51 1.64 +++ src/sys/kern/uipc_socket.c 2002/08/29 05:23:14 1.66.2.3 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.66.2.3 2002/08/29 05:23:14 gehenna Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.64 2002/05/02 17:55:51 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.66.2.3 2002/08/29 05:23:14 gehenna Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -134,7 +134,11 @@ soinit(void) #endif /* SOSEND_COUNTERS */ } -#ifdef SOSEND_LOAN +#ifdef SOSEND_NO_LOAN +int use_sosend_loan = 0; +#else +int use_sosend_loan = 1; +#endif struct mbuf *so_pendfree; @@ -303,8 +307,6 @@ sosend_loan(struct socket *so, struct ui return (space); } -#endif /* SOSEND_LOAN */ - /* * Socket operation routines. * These routines are called by the routines in @@ -390,9 +392,7 @@ solisten(struct socket *so, int backlog) void sofree(struct socket *so) { -#ifdef SOSEND_LOAN struct mbuf *m; -#endif if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; @@ -407,13 +407,11 @@ sofree(struct socket *so) } sbrelease(&so->so_snd); sorflush(so); -#ifdef SOSEND_LOAN while ((m = so->so_pendfree) != NULL) { so->so_pendfree = m->m_next; m->m_next = so_pendfree; so_pendfree = m; } -#endif pool_put(&socket_pool, so); } @@ -569,9 +567,7 @@ sodisconnect(struct socket *so) (struct proc *)0); bad: splx(s); -#ifdef SOSEND_LOAN sodopendfree(so); -#endif return (error); } @@ -602,9 +598,7 @@ sosend(struct socket *so, struct mbuf *a long space, len, resid, clen, mlen; int error, s, dontroute, atomic; -#ifdef SOSEND_LOAN sodopendfree(so); -#endif p = curproc; /* XXX */ clen = 0; @@ -691,8 +685,8 @@ sosend(struct socket *so, struct mbuf *a MGET(m, M_WAIT, MT_DATA); mlen = MLEN; } -#ifdef SOSEND_LOAN - if (uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && + if (use_sosend_loan && + uio->uio_iov->iov_len >= SOCK_LOAN_THRESH && space >= SOCK_LOAN_THRESH && (len = sosend_loan(so, uio, m, space)) != 0) { @@ -700,7 +694,6 @@ sosend(struct socket *so, struct mbuf *a space -= len; goto have_data; } -#endif /* SOSEND_LOAN */ if (resid >= MINCLSIZE && space >= MCLBYTES) { SOSEND_COUNTER_INCR(&sosend_copy_big); MCLGET(m, M_WAIT); @@ -728,9 +721,7 @@ sosend(struct socket *so, struct mbuf *a } error = uiomove(mtod(m, caddr_t), (int)len, uio); -#ifdef SOSEND_LOAN have_data: -#endif resid = uio->uio_resid; m->m_len = len; *mp = m; @@ -806,10 +797,7 @@ soreceive(struct socket *so, struct mbuf int flags, len, error, s, offset, moff, type, orig_resid; struct protosw *pr; struct mbuf *nextrecord; - -#ifdef SOSEND_LOAN - sodopendfree(so); -#endif + int mbuf_removed = 0; pr = so->so_proto; mp = mp0; @@ -823,6 +811,10 @@ soreceive(struct socket *so, struct mbuf flags = *flagsp &~ MSG_EOR; else flags = 0; + + if ((flags & MSG_DONTWAIT) == 0) + sodopendfree(so); + if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, @@ -902,6 +894,8 @@ soreceive(struct socket *so, struct mbuf error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -910,10 +904,18 @@ soreceive(struct socket *so, struct mbuf goto restart; } dontblock: + /* + * On entry here, m points to the first record of the socket buffer. + * While we process the initial mbufs containing address and control + * info, we save a copy of m->m_nextpkt into nextrecord. + */ #ifdef notyet /* XXXX */ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; #endif + KASSERT(m == so->so_rcv.sb_mb); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC @@ -927,6 +929,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; @@ -945,6 +948,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == @@ -964,13 +968,39 @@ soreceive(struct socket *so, struct mbuf controlp = &(*controlp)->m_next; } } + + /* + * If m is non-NULL, we have some data to read. From now on, + * make sure to keep sb_lastrecord consistent when working on + * the last packet on the chain (nextrecord == NULL) and we + * change m->m_nextpkt. + */ if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -998,11 +1028,29 @@ soreceive(struct socket *so, struct mbuf * block interrupts again. */ if (mp == 0) { + SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splsoftnet(); - if (error) + if (error) { + /* + * If any part of the record has been removed + * (such as the MT_SONAME mbuf, which will + * happen when PR_ADDR, and thus also + * PR_ATOMIC, is set), then drop the entire + * record to maintain the atomicity of the + * receive operation. + * + * This avoids a later panic("receive 1a") + * when compiled with DIAGNOSTIC. + */ + if (m && mbuf_removed + && (pr->pr_flags & PR_ATOMIC)) + (void) sbdroprecord(&so->so_rcv); + goto release; + } } else uio->uio_resid -= len; if (len == m->m_len - moff) { @@ -1023,8 +1071,21 @@ soreceive(struct socket *so, struct mbuf MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } - if (m) + /* + * If m != NULL, we also know that + * so->so_rcv.sb_mb != NULL. + */ + KASSERT(so->so_rcv.sb_mb == m); + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } } else { if (flags & MSG_PEEK) @@ -1063,6 +1124,25 @@ soreceive(struct socket *so, struct mbuf !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; + /* + * If we are peeking and the socket receive buffer is + * full, stop since we can't get more data to peek at. + */ + if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) + break; + /* + * If we've drained the socket buffer, tell the + * protocol in case it needs to do something to + * get it filled again. + */ + if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) + (*pr->pr_usrreq)(so, PRU_RCVD, + (struct mbuf *)0, + (struct mbuf *)(long)flags, + (struct mbuf *)0, + (struct proc *)0); + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1080,8 +1160,21 @@ soreceive(struct socket *so, struct mbuf (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)(long)flags, (struct mbuf *)0,