Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v retrieving revision 1.66 retrieving revision 1.66.2.3 diff -u -p -r1.66 -r1.66.2.3 --- src/sys/kern/uipc_socket.c 2002/05/07 08:06:35 1.66 +++ src/sys/kern/uipc_socket.c 2002/08/29 05:23:14 1.66.2.3 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.66 2002/05/07 08:06:35 enami Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.66.2.3 2002/08/29 05:23:14 gehenna Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -72,7 +72,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.66 2002/05/07 08:06:35 enami Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.66.2.3 2002/08/29 05:23:14 gehenna Exp $"); #include "opt_sock_counters.h" #include "opt_sosend_loan.h" @@ -134,10 +134,10 @@ soinit(void) #endif /* SOSEND_COUNTERS */ } -#ifdef SOSEND_LOAN -int use_sosend_loan = 1; -#else +#ifdef SOSEND_NO_LOAN int use_sosend_loan = 0; +#else +int use_sosend_loan = 1; #endif struct mbuf *so_pendfree; @@ -797,6 +797,7 @@ soreceive(struct socket *so, struct mbuf int flags, len, error, s, offset, moff, type, orig_resid; struct protosw *pr; struct mbuf *nextrecord; + int mbuf_removed = 0; pr = so->so_proto; mp = mp0; @@ -893,6 +894,8 @@ soreceive(struct socket *so, struct mbuf error = EWOULDBLOCK; goto release; } + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); @@ -901,10 +904,18 @@ soreceive(struct socket *so, struct mbuf goto restart; } dontblock: + /* + * On entry here, m points to the first record of the socket buffer. + * While we process the initial mbufs containing address and control + * info, we save a copy of m->m_nextpkt into nextrecord. + */ #ifdef notyet /* XXXX */ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; #endif + KASSERT(m == so->so_rcv.sb_mb); + SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC @@ -918,6 +929,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (paddr) { *paddr = m; so->so_rcv.sb_mb = m->m_next; @@ -936,6 +948,7 @@ soreceive(struct socket *so, struct mbuf m = m->m_next; } else { sbfree(&so->so_rcv, m); + mbuf_removed = 1; if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == @@ -955,13 +968,39 @@ soreceive(struct socket *so, struct mbuf controlp = &(*controlp)->m_next; } } + + /* + * If m is non-NULL, we have some data to read. From now on, + * make sure to keep sb_lastrecord consistent when working on + * the last packet on the chain (nextrecord == NULL) and we + * change m->m_nextpkt. + */ if (m) { - if ((flags & MSG_PEEK) == 0) + if ((flags & MSG_PEEK) == 0) { m->m_nextpkt = nextrecord; + /* + * If nextrecord == NULL (this is a single chain), + * then sb_lastrecord may not be valid here if m + * was changed earlier. + */ + if (nextrecord == NULL) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_lastrecord = m; + } + } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; + } else { + if ((flags & MSG_PEEK) == 0) { + KASSERT(so->so_rcv.sb_mb == m); + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); + moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { @@ -989,11 +1028,29 @@ soreceive(struct socket *so, struct mbuf * block interrupts again. */ if (mp == 0) { + SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splsoftnet(); - if (error) + if (error) { + /* + * If any part of the record has been removed + * (such as the MT_SONAME mbuf, which will + * happen when PR_ADDR, and thus also + * PR_ATOMIC, is set), then drop the entire + * record to maintain the atomicity of the + * receive operation. + * + * This avoids a later panic("receive 1a") + * when compiled with DIAGNOSTIC. + */ + if (m && mbuf_removed + && (pr->pr_flags & PR_ATOMIC)) + (void) sbdroprecord(&so->so_rcv); + goto release; + } } else uio->uio_resid -= len; if (len == m->m_len - moff) { @@ -1014,8 +1071,21 @@ soreceive(struct socket *so, struct mbuf MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } - if (m) + /* + * If m != NULL, we also know that + * so->so_rcv.sb_mb != NULL. + */ + KASSERT(so->so_rcv.sb_mb == m); + if (m) { m->m_nextpkt = nextrecord; + if (nextrecord == NULL) + so->so_rcv.sb_lastrecord = m; + } else { + so->so_rcv.sb_mb = nextrecord; + SB_EMPTY_FIXUP(&so->so_rcv); + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); } } else { if (flags & MSG_PEEK) @@ -1054,6 +1124,25 @@ soreceive(struct socket *so, struct mbuf !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; + /* + * If we are peeking and the socket receive buffer is + * full, stop since we can't get more data to peek at. + */ + if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0) + break; + /* + * If we've drained the socket buffer, tell the + * protocol in case it needs to do something to + * get it filled again. + */ + if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) + (*pr->pr_usrreq)(so, PRU_RCVD, + (struct mbuf *)0, + (struct mbuf *)(long)flags, + (struct mbuf *)0, + (struct proc *)0); + SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1071,8 +1160,21 @@ soreceive(struct socket *so, struct mbuf (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { - if (m == 0) + if (m == 0) { + /* + * First part is an inline SB_EMPTY_FIXUP(). Second + * part makes sure sb_lastrecord is up-to-date if + * there is still data in the socket buffer. + */ so->so_rcv.sb_mb = nextrecord; + if (so->so_rcv.sb_mb == NULL) { + so->so_rcv.sb_mbtail = NULL; + so->so_rcv.sb_lastrecord = NULL; + } else if (nextrecord->m_nextpkt == NULL) + so->so_rcv.sb_lastrecord = nextrecord; + } + SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); + SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)(long)flags, (struct mbuf *)0,