Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.38 retrieving revision 1.56.2.3 diff -u -p -r1.38 -r1.56.2.3 --- src/sys/kern/uipc_socket.c 1998/08/04 04:03:17 1.38 +++ src/sys/kern/uipc_socket.c 2002/01/10 20:00:14 1.56.2.3 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.38 1998/08/04 04:03:17 perry Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.56.2.3 2002/01/10 20:00:14 thorpej Exp $ */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 @@ -35,7 +35,8 @@ * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 */ -#include "opt_compat_sunos.h" +#include +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.56.2.3 2002/01/10 20:00:14 thorpej Exp $"); #include #include @@ -51,11 +52,28 @@ #include #include #include +#include -struct pool socket_pool; +static void filt_sordetach(struct knote *kn); +static int filt_soread(struct knote *kn, long hint); +static void filt_sowdetach(struct knote *kn); +static int filt_sowrite(struct knote *kn, long hint); +static int filt_solisten(struct knote *kn, long hint); + +static const struct filterops solisten_filtops = + { 1, NULL, filt_sordetach, filt_solisten }; +const struct filterops soread_filtops = + { 1, NULL, filt_sordetach, filt_soread }; +const struct filterops sowrite_filtops = + { 1, NULL, filt_sowdetach, filt_sowrite }; + +struct pool socket_pool; + +extern int somaxconn; /* patchable (XXX sysctl) */ +int somaxconn = SOMAXCONN; void -soinit() +soinit(void) { pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, @@ -71,17 +89,14 @@ soinit() */ /*ARGSUSED*/ int -socreate(dom, aso, type, proto) - int dom; - struct socket **aso; - register int type; - int proto; -{ - struct proc *p = curproc; /* XXX */ - register struct protosw *prp; - register struct socket *so; - register int error; +socreate(int dom, struct socket **aso, int type, int proto) +{ + struct proc *p; + struct protosw *prp; + struct socket *so; + int error, s; + p = curproc; /* XXX */ if (proto) prp = pffindproto(dom, proto, type); else @@ -90,6 +105,7 @@ socreate(dom, aso, type, proto) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); + s = splsoftnet(); so = pool_get(&socket_pool, PR_WAITOK); memset((caddr_t)so, 0, sizeof(*so)); TAILQ_INIT(&so->so_q0); @@ -98,33 +114,27 @@ socreate(dom, aso, type, proto) so->so_proto = prp; so->so_send = sosend; so->so_receive = soreceive; + if (p != 0) + so->so_uid = p->p_ucred->cr_uid; error = (*prp->pr_usrreq)(so, PRU_ATTACH, (struct mbuf *)0, (struct mbuf *)(long)proto, (struct mbuf *)0, p); if (error) { so->so_state |= SS_NOFDREF; sofree(so); + splx(s); return (error); } -#ifdef COMPAT_SUNOS - { - extern struct emul emul_sunos; - if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) - so->so_options |= SO_BROADCAST; - } -#endif + splx(s); *aso = so; return (0); } int -sobind(so, nam) - struct socket *so; - struct mbuf *nam; -{ - struct proc *p = curproc; /* XXX */ - int s = splsoftnet(); - int error; +sobind(struct socket *so, struct mbuf *nam, struct proc *p) +{ + int s, error; + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, (struct mbuf *)0, nam, (struct mbuf *)0, p); splx(s); @@ -132,12 +142,11 @@ sobind(so, nam) } int -solisten(so, backlog) - register struct socket *so; - int backlog; +solisten(struct socket *so, int backlog) { - int s = splsoftnet(), error; + int s, error; + s = splsoftnet(); error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); if (error) { @@ -148,22 +157,25 @@ solisten(so, backlog) so->so_options |= SO_ACCEPTCONN; if (backlog < 0) backlog = 0; - so->so_qlimit = min(backlog, SOMAXCONN); + so->so_qlimit = min(backlog, somaxconn); splx(s); return (0); } void -sofree(so) - register struct socket *so; +sofree(struct socket *so) { if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; if (so->so_head) { - if (!soqremque(so, 0) && !soqremque(so, 1)) - panic("sofree dq"); - so->so_head = 0; + /* + * We must not decommission a socket that's on the accept(2) + * queue. If we do, then accept(2) may hang after select(2) + * indicated that the listening socket was ready. + */ + if (!soqremque(so, 0)) + return; } sbrelease(&so->so_snd); sorflush(so); @@ -176,17 +188,22 @@ sofree(so) * Free socket when disconnect complete. */ int -soclose(so) - register struct socket *so; +soclose(struct socket *so) { - int s = splsoftnet(); /* conservative */ - int error = 0; + struct socket *so2; + int s, error; + error = 0; + s = splsoftnet(); /* conservative */ if (so->so_options & SO_ACCEPTCONN) { - while (so->so_q0.tqh_first) - (void) soabort(so->so_q0.tqh_first); - while (so->so_q.tqh_first) - (void) soabort(so->so_q.tqh_first); + while ((so2 = so->so_q0.tqh_first) != 0) { + (void) soqremque(so2, 0); + (void) soabort(so2); + } + while ((so2 = so->so_q.tqh_first) != 0) { + (void) soqremque(so2, 1); + (void) soabort(so2); + } } if (so->so_pcb == 0) goto discard; @@ -209,7 +226,7 @@ soclose(so) } } } -drop: + drop: if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, @@ -217,7 +234,7 @@ drop: if (error == 0) error = error2; } -discard: + discard: if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); so->so_state |= SS_NOFDREF; @@ -230,8 +247,7 @@ discard: * Must be called at splsoftnet... */ int -soabort(so) - struct socket *so; +soabort(struct socket *so) { return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, (struct mbuf *)0, @@ -239,31 +255,33 @@ soabort(so) } int -soaccept(so, nam) - register struct socket *so; - struct mbuf *nam; +soaccept(struct socket *so, struct mbuf *nam) { - int s = splsoftnet(); - int error; + int s, error; + error = 0; + s = splsoftnet(); if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; - error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, (struct mbuf *)0, - nam, (struct mbuf *)0, (struct proc *)0); + if ((so->so_state & SS_ISDISCONNECTED) == 0 || + (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) + error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, + (struct mbuf *)0, nam, (struct mbuf *)0, (struct proc *)0); + else + error = ECONNABORTED; + splx(s); return (error); } int -soconnect(so, nam) - register struct socket *so; - struct mbuf *nam; -{ - struct proc *p = curproc; /* XXX */ - int s; - int error; +soconnect(struct socket *so, struct mbuf *nam) +{ + struct proc *p; + int s, error; + p = curproc; /* XXX */ if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splsoftnet(); @@ -285,13 +303,11 @@ soconnect(so, nam) } int -soconnect2(so1, so2) - register struct socket *so1; - struct socket *so2; +soconnect2(struct socket *so1, struct socket *so2) { - int s = splsoftnet(); - int error; + int s, error; + s = splsoftnet(); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0, (struct proc *)0); @@ -300,12 +316,11 @@ soconnect2(so1, so2) } int -sodisconnect(so) - register struct socket *so; +sodisconnect(struct socket *so) { - int s = splsoftnet(); - int error; + int s, error; + s = splsoftnet(); if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto bad; @@ -317,7 +332,7 @@ sodisconnect(so) error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); -bad: + bad: splx(s); return (error); } @@ -341,21 +356,17 @@ bad: * Data and control buffers are freed on return. */ int -sosend(so, addr, uio, top, control, flags) - register struct socket *so; - struct mbuf *addr; - struct uio *uio; - struct mbuf *top; - struct mbuf *control; - int flags; -{ - struct proc *p = curproc; /* XXX */ - struct mbuf **mp; - register struct mbuf *m; - register long space, len, resid; - int clen = 0, error, s, dontroute, mlen; - int atomic = sosendallatonce(so) || top; - +sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, + struct mbuf *control, int flags) +{ + struct proc *p; + struct mbuf **mp, *m; + long space, len, resid, clen, mlen; + int error, s, dontroute, atomic; + + p = curproc; /* XXX */ + clen = 0; + atomic = sosendallatonce(so) || top; if (uio) resid = uio->uio_resid; else @@ -379,15 +390,19 @@ sosend(so, addr, uio, top, control, flag clen = control->m_len; #define snderr(errno) { error = errno; splx(s); goto release; } -restart: + restart: if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) goto out; do { s = splsoftnet(); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); - if (so->so_error) - snderr(so->so_error); + if (so->so_error) { + error = so->so_error; + so->so_error = 0; + splx(s); + goto release; + } if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if ((so->so_state & SS_ISCONFIRMING) == 0 && @@ -417,84 +432,96 @@ restart: mp = ⊤ space -= clen; do { - if (uio == NULL) { - /* - * Data is prepackaged in "top". - */ - resid = 0; - if (flags & MSG_EOR) - top->m_flags |= M_EOR; - } else do { - if (top == 0) { - MGETHDR(m, M_WAIT, MT_DATA); - mlen = MHLEN; - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { - MGET(m, M_WAIT, MT_DATA); - mlen = MLEN; - } - if (resid >= MINCLSIZE && space >= MCLBYTES) { - MCLGET(m, M_WAIT); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = MCLBYTES; + if (uio == NULL) { + /* + * Data is prepackaged in "top". + */ + resid = 0; + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + } else do { + if (top == 0) { + MGETHDR(m, M_WAIT, MT_DATA); + mlen = MHLEN; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = (struct ifnet *)0; + } else { + MGET(m, M_WAIT, MT_DATA); + mlen = MLEN; + } + if (resid >= MINCLSIZE && space >= MCLBYTES) { + MCLGET(m, M_WAIT); + if ((m->m_flags & M_EXT) == 0) + goto nopages; + mlen = MCLBYTES; #ifdef MAPPED_MBUFS - len = min(MCLBYTES, resid); + len = lmin(MCLBYTES, resid); #else - if (atomic && top == 0) { - len = min(MCLBYTES - max_hdr, resid); - m->m_data += max_hdr; - } else - len = min(MCLBYTES, resid); + if (atomic && top == 0) { + len = lmin(MCLBYTES - max_hdr, + resid); + m->m_data += max_hdr; + } else + len = lmin(MCLBYTES, resid); #endif - space -= len; - } else { + space -= len; + } else { nopages: - len = min(min(mlen, resid), space); - space -= len; - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); - } - error = uiomove(mtod(m, caddr_t), (int)len, uio); - resid = uio->uio_resid; - m->m_len = len; - *mp = m; - top->m_pkthdr.len += len; + len = lmin(lmin(mlen, resid), space); + space -= len; + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); + } + error = uiomove(mtod(m, caddr_t), (int)len, + uio); + resid = uio->uio_resid; + m->m_len = len; + *mp = m; + top->m_pkthdr.len += len; + if (error) + goto release; + mp = &m->m_next; + if (resid <= 0) { + if (flags & MSG_EOR) + top->m_flags |= M_EOR; + break; + } + } while (space > 0 && atomic); + + s = splsoftnet(); + + if (so->so_state & SS_CANTSENDMORE) + snderr(EPIPE); + + if (dontroute) + so->so_options |= SO_DONTROUTE; + if (resid > 0) + so->so_state |= SS_MORETOCOME; + error = (*so->so_proto->pr_usrreq)(so, + (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, + top, addr, control, p); + if (dontroute) + so->so_options &= ~SO_DONTROUTE; + if (resid > 0) + so->so_state &= ~SS_MORETOCOME; + splx(s); + + clen = 0; + control = 0; + top = 0; + mp = ⊤ if (error) goto release; - mp = &m->m_next; - if (resid <= 0) { - if (flags & MSG_EOR) - top->m_flags |= M_EOR; - break; - } - } while (space > 0 && atomic); - if (dontroute) - so->so_options |= SO_DONTROUTE; - s = splsoftnet(); /* XXX */ - error = (*so->so_proto->pr_usrreq)(so, - (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, - top, addr, control, p); - splx(s); - if (dontroute) - so->so_options &= ~SO_DONTROUTE; - clen = 0; - control = 0; - top = 0; - mp = ⊤ - if (error) - goto release; } while (resid && space > 0); } while (resid); -release: + release: sbunlock(&so->so_snd); -out: + out: if (top) m_freem(top); if (control) @@ -519,22 +546,18 @@ out: * only for the count in uio_resid. */ int -soreceive(so, paddr, uio, mp0, controlp, flagsp) - register struct socket *so; - struct mbuf **paddr; - struct uio *uio; - struct mbuf **mp0; - struct mbuf **controlp; - int *flagsp; -{ - register struct mbuf *m, **mp; - register int flags, len, error, s, offset; - struct protosw *pr = so->so_proto; - struct mbuf *nextrecord; - int moff, type = 0; - int orig_resid = uio->uio_resid; +soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, + struct mbuf **mp0, struct mbuf **controlp, int *flagsp) +{ + struct mbuf *m, **mp; + int flags, len, error, s, offset, moff, type, orig_resid; + struct protosw *pr; + struct mbuf *nextrecord; + pr = so->so_proto; mp = mp0; + type = 0; + orig_resid = uio->uio_resid; if (paddr) *paddr = 0; if (controlp) @@ -555,7 +578,7 @@ soreceive(so, paddr, uio, mp0, controlp, (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); -bad: + bad: if (m) m_freem(m); return (error); @@ -566,7 +589,7 @@ bad: (*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); -restart: + restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) return (error); s = splsoftnet(); @@ -629,7 +652,7 @@ restart: return (error); goto restart; } -dontblock: + dontblock: #ifdef notyet /* XXXX */ if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; @@ -669,7 +692,7 @@ dontblock: if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) - error = (*pr->pr_domain->dom_externalize)(m); + error = (*pr->pr_domain->dom_externalize)(m); *controlp = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; @@ -721,6 +744,8 @@ dontblock: splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splsoftnet(); + if (error) + goto release; } else uio->uio_resid -= len; if (len == m->m_len - moff) { @@ -814,19 +839,18 @@ dontblock: if (flagsp) *flagsp |= flags; -release: + release: sbunlock(&so->so_rcv); splx(s); return (error); } int -soshutdown(so, how) - struct socket *so; - int how; +soshutdown(struct socket *so, int how) { - struct protosw *pr = so->so_proto; + struct protosw *pr; + pr = so->so_proto; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); @@ -839,17 +863,17 @@ soshutdown(so, how) } void -sorflush(so) - register struct socket *so; +sorflush(struct socket *so) { - register struct sockbuf *sb = &so->so_rcv; - register struct protosw *pr = so->so_proto; - register int s; - struct sockbuf asb; + struct sockbuf *sb, asb; + struct protosw *pr; + int s; + sb = &so->so_rcv; + pr = so->so_proto; sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAITOK); - s = splimp(); + s = splnet(); socantrcvmore(so); sbunlock(sb); asb = *sb; @@ -861,14 +885,13 @@ sorflush(so) } int -sosetopt(so, level, optname, m0) - register struct socket *so; - int level, optname; - struct mbuf *m0; +sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) { - int error = 0; - register struct mbuf *m = m0; + int error; + struct mbuf *m; + error = 0; + m = m0; if (level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) return ((*so->so_proto->pr_ctloutput) @@ -995,19 +1018,16 @@ sosetopt(so, level, optname, m0) m = NULL; /* freed by protocol */ } } -bad: + bad: if (m) (void) m_free(m); return (error); } int -sogetopt(so, level, optname, mp) - register struct socket *so; - int level, optname; - struct mbuf **mp; +sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) { - register struct mbuf *m; + struct mbuf *m; if (level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) { @@ -1088,8 +1108,7 @@ sogetopt(so, level, optname, mp) } void -sohasoutofband(so) - register struct socket *so; +sohasoutofband(struct socket *so) { struct proc *p; @@ -1099,3 +1118,130 @@ sohasoutofband(so) psignal(p, SIGURG); selwakeup(&so->so_rcv.sb_sel); } + + +int +soo_kqfilter(struct file *fp, struct knote *kn) +{ + struct socket *so; + struct sockbuf *sb; + int s; + + so = (struct socket *)kn->kn_fp->f_data; + switch (kn->kn_filter) { + case EVFILT_READ: + if (so->so_options & SO_ACCEPTCONN) + kn->kn_fop = &solisten_filtops; + else + kn->kn_fop = &soread_filtops; + sb = &so->so_rcv; + break; + case EVFILT_WRITE: + kn->kn_fop = &sowrite_filtops; + sb = &so->so_snd; + break; + default: + return (1); + } + s = splnet(); /* XXXLUKEM: maybe splsoftnet() ? */ + SLIST_INSERT_HEAD(&sb->sb_sel.si_klist, kn, kn_selnext); + sb->sb_flags |= SB_KNOTE; + splx(s); + return (0); +} + +static void +filt_sordetach(struct knote *kn) +{ + struct socket *so; + int s; + + so = (struct socket *)kn->kn_fp->f_data; + s = splnet(); /* XXXLUKEM: maybe splsoftnet() ? */ + SLIST_REMOVE(&so->so_rcv.sb_sel.si_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_klist)) + so->so_rcv.sb_flags &= ~SB_KNOTE; + splx(s); +} + +/*ARGSUSED*/ +static int +filt_soread(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = so->so_rcv.sb_cc; + if (so->so_state & SS_CANTRCVMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_rcv.sb_lowat); +} + +static void +filt_sowdetach(struct knote *kn) +{ + struct socket *so; + int s; + + so = (struct socket *)kn->kn_fp->f_data; + s = splnet(); /* XXXLUKEM: maybe splsoftnet() ? */ + SLIST_REMOVE(&so->so_snd.sb_sel.si_klist, kn, knote, kn_selnext); + if (SLIST_EMPTY(&so->so_snd.sb_sel.si_klist)) + so->so_snd.sb_flags &= ~SB_KNOTE; + splx(s); +} + +/*ARGSUSED*/ +static int +filt_sowrite(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; + kn->kn_data = sbspace(&so->so_snd); + if (so->so_state & SS_CANTSENDMORE) { + kn->kn_flags |= EV_EOF; + kn->kn_fflags = so->so_error; + return (1); + } + if (so->so_error) /* temporary udp error */ + return (1); + if (((so->so_state & SS_ISCONNECTED) == 0) && + (so->so_proto->pr_flags & PR_CONNREQUIRED)) + return (0); + if (kn->kn_sfflags & NOTE_LOWAT) + return (kn->kn_data >= kn->kn_sdata); + return (kn->kn_data >= so->so_snd.sb_lowat); +} + +/*ARGSUSED*/ +static int +filt_solisten(struct knote *kn, long hint) +{ + struct socket *so; + + so = (struct socket *)kn->kn_fp->f_data; +#if 0 + /* + * XXXLUKEM: this was freebsd's code. it appears that they + * XXXLUKEM: modified the socket code to store the count + * XXXLUKEM: of all connections in so_qlen, and separately + * XXXLUKEM: track the number of incompletes in so_incqlen. + * XXXLUKEM: as we appear to keep only completed connections + * XXXLUKEM: on so_qlen we can just return that. + * XXXLUKEM: that said, a socket guru should double check for me :) + */ + kn->kn_data = so->so_qlen - so->so_incqlen; + return (! TAILQ_EMPTY(&so->so_comp)); +#else + kn->kn_data = so->so_qlen; + return (kn->kn_data > 0); +#endif +}