Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.210 retrieving revision 1.234 diff -u -p -r1.210 -r1.234 --- src/sys/kern/uipc_socket.c 2012/03/16 06:47:37 1.210 +++ src/sys/kern/uipc_socket.c 2014/08/09 05:33:00 1.234 @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_socket.c,v 1.210 2012/03/16 06:47:37 matt Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.234 2014/08/09 05:33:00 rtr Exp $ */ /*- * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -62,8 +62,16 @@ * @(#)uipc_socket.c 8.6 (Berkeley) 5/2/95 */ +/* + * Socket operation routines. + * + * These routines are called by the routines in sys_socket.c or from a + * system process, and implement the semantics of socket operations by + * switching out to the protocol specific routines. + */ + #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.210 2012/03/16 06:47:37 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.234 2014/08/09 05:33:00 rtr Exp $"); #include "opt_compat_netbsd.h" #include "opt_sock_counters.h" @@ -103,7 +111,6 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include -MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); MALLOC_DEFINE(M_SONAME, "soname", "socket name"); extern const struct fileops socketops; @@ -160,7 +167,7 @@ static void sopendfree_thread(void *); static kcondvar_t pendfree_thread_cv; static lwp_t *sopendfree_lwp; -static void sysctl_kern_somaxkva_setup(void); +static void sysctl_kern_socket_setup(void); static struct sysctllog *socket_sysctllog; static vsize_t @@ -416,7 +423,7 @@ socket_listener_cb(kauth_cred_t cred, ka /* Normal users can only drop their own connections. */ struct socket *so = (struct socket *)arg1; - if (proc_uidmatch(cred, so->so_cred)) + if (so->so_cred && proc_uidmatch(cred, so->so_cred) == 0) result = KAUTH_RESULT_ALLOW; break; @@ -453,7 +460,7 @@ void soinit(void) { - sysctl_kern_somaxkva_setup(); + sysctl_kern_socket_setup(); mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); @@ -479,13 +486,11 @@ soinit1(void) } /* - * Socket operation routines. - * These routines are called by the routines in - * sys_socket.c or from a system process, and - * implement the semantics of socket operations by - * switching out to the protocol specific routines. + * socreate: create a new socket of the specified type and the protocol. + * + * => Caller may specify another socket for lock sharing (must not be held). + * => Returns the new socket without lock held. */ -/*ARGSUSED*/ int socreate(int dom, struct socket **aso, int type, int proto, struct lwp *l, struct socket *lockso) @@ -515,7 +520,7 @@ socreate(int dom, struct socket **aso, i return EPROTOTYPE; return EPROTONOSUPPORT; } - if (prp->pr_usrreq == NULL) + if (prp->pr_usrreqs == NULL) return EPROTONOSUPPORT; if (prp->pr_type != type) return EPROTOTYPE; @@ -533,59 +538,75 @@ socreate(int dom, struct socket **aso, i uid = kauth_cred_geteuid(l->l_cred); so->so_uidinfo = uid_find(uid); so->so_cpid = l->l_proc->p_pid; - if (lockso != NULL) { - /* Caller wants us to share a lock. */ + + /* + * Lock assigned and taken during PCB attach, unless we share + * the lock with another socket, e.g. socketpair(2) case. + */ + if (lockso) { lock = lockso->so_lock; so->so_lock = lock; mutex_obj_hold(lock); mutex_enter(lock); - } else { - /* Lock assigned and taken during PRU_ATTACH. */ } - error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, - (struct mbuf *)(long)proto, NULL, l); + + /* Attach the PCB (returns with the socket lock held). */ + error = (*prp->pr_usrreqs->pr_attach)(so, proto); KASSERT(solocked(so)); - if (error != 0) { + + if (error) { + KASSERT(so->so_pcb == NULL); so->so_state |= SS_NOFDREF; sofree(so); return error; } so->so_cred = kauth_cred_dup(l->l_cred); sounlock(so); + *aso = so; return 0; } -/* On success, write file descriptor to fdout and return zero. On - * failure, return non-zero; *fdout will be undefined. +/* + * fsocreate: create a socket and a file descriptor associated with it. + * + * => On success, write file descriptor to fdout and return zero. + * => On failure, return non-zero; *fdout will be undefined. */ int -fsocreate(int domain, struct socket **sop, int type, int protocol, - struct lwp *l, int *fdout) +fsocreate(int domain, struct socket **sop, int type, int proto, int *fdout) { - struct socket *so; - struct file *fp; - int fd, error; - int flags = type & SOCK_FLAGS_MASK; + lwp_t *l = curlwp; + int error, fd, flags; + struct socket *so; + struct file *fp; - type &= ~SOCK_FLAGS_MASK; - if ((error = fd_allocfile(&fp, &fd)) != 0) + if ((error = fd_allocfile(&fp, &fd)) != 0) { return error; + } + flags = type & SOCK_FLAGS_MASK; fd_set_exclose(l, fd, (flags & SOCK_CLOEXEC) != 0); fp->f_flag = FREAD|FWRITE|((flags & SOCK_NONBLOCK) ? FNONBLOCK : 0)| ((flags & SOCK_NOSIGPIPE) ? FNOSIGPIPE : 0); fp->f_type = DTYPE_SOCKET; fp->f_ops = &socketops; - error = socreate(domain, &so, type, protocol, l, NULL); - if (error != 0) { + + type &= ~SOCK_FLAGS_MASK; + error = socreate(domain, &so, type, proto, l, NULL); + if (error) { fd_abort(curproc, fp, fd); - } else { - if (sop != NULL) - *sop = so; - fp->f_data = so; - fd_affix(curproc, fp, fd); - *fdout = fd; + return error; + } + if (flags & SOCK_NONBLOCK) { + so->so_state |= SS_NBIO; + } + fp->f_data = so; + fd_affix(curproc, fp, fd); + + if (sop != NULL) { + *sop = so; } + *fdout = fd; return error; } @@ -608,7 +629,7 @@ sobind(struct socket *so, struct mbuf *n int error; solock(so); - error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, l); + error = (*so->so_proto->pr_usrreqs->pr_bind)(so, nam, l); sounlock(so); return error; } @@ -621,11 +642,10 @@ solisten(struct socket *so, int backlog, solock(so); if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) != 0) { - sounlock(so); - return (EINVAL); + sounlock(so); + return EINVAL; } - error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, - NULL, NULL, l); + error = (*so->so_proto->pr_usrreqs->pr_listen)(so, l); if (error != 0) { sounlock(so); return error; @@ -682,18 +702,15 @@ sofree(struct socket *so) } /* - * Close a socket on last file table reference removal. - * Initiate disconnect if connected. - * Free socket when disconnect complete. + * soclose: close a socket on last file table reference removal. + * Initiate disconnect if connected. Free socket when disconnect complete. */ int soclose(struct socket *so) { - struct socket *so2; - int error; - int error2; + struct socket *so2; + int error = 0; - error = 0; solock(so); if (so->so_options & SO_ACCEPTCONN) { for (;;) { @@ -716,7 +733,7 @@ soclose(struct socket *so) break; } } - if (so->so_pcb == 0) + if (so->so_pcb == NULL) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { @@ -737,18 +754,15 @@ soclose(struct socket *so) } drop: if (so->so_pcb) { - error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, - NULL, NULL, NULL, NULL); - if (error == 0) - error = error2; + KASSERT(solocked(so)); + (*so->so_proto->pr_usrreqs->pr_detach)(so); } discard: - if (so->so_state & SS_NOFDREF) - panic("soclose: NOFDREF"); + KASSERT((so->so_state & SS_NOFDREF) == 0); kauth_cred_free(so->so_cred); so->so_state |= SS_NOFDREF; sofree(so); - return (error); + return error; } /* @@ -764,8 +778,7 @@ soabort(struct socket *so) KASSERT(so->so_head == NULL); so->so_aborting++; /* XXX */ - error = (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, - NULL, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_abort)(so); refs = --so->so_aborting; /* XXX */ if (error || (refs == 0)) { sofree(so); @@ -778,33 +791,30 @@ soabort(struct socket *so) int soaccept(struct socket *so, struct mbuf *nam) { - int error; + int error; KASSERT(solocked(so)); + KASSERT((so->so_state & SS_NOFDREF) != 0); - error = 0; - if ((so->so_state & SS_NOFDREF) == 0) - panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; if ((so->so_state & SS_ISDISCONNECTED) == 0 || (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) - error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, - NULL, nam, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_accept)(so, nam); else error = ECONNABORTED; - return (error); + return error; } int soconnect(struct socket *so, struct mbuf *nam, struct lwp *l) { - int error; + int error; KASSERT(solocked(so)); if (so->so_options & SO_ACCEPTCONN) - return (EOPNOTSUPP); + return EOPNOTSUPP; /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. @@ -816,21 +826,17 @@ soconnect(struct socket *so, struct mbuf (error = sodisconnect(so)))) error = EISCONN; else - error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, - NULL, nam, NULL, l); - return (error); + error = (*so->so_proto->pr_usrreqs->pr_connect)(so, nam, l); + + return error; } int soconnect2(struct socket *so1, struct socket *so2) { - int error; - KASSERT(solocked2(so1, so2)); - error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, - NULL, (struct mbuf *)so2, NULL, NULL); - return (error); + return (*so1->so_proto->pr_usrreqs->pr_connect2)(so1, so2); } int @@ -845,8 +851,7 @@ sodisconnect(struct socket *so) } else if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; } else { - error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, - NULL, NULL, NULL, NULL); + error = (*so->so_proto->pr_usrreqs->pr_disconnect)(so); } return (error); } @@ -874,12 +879,10 @@ sosend(struct socket *so, struct mbuf *a struct mbuf *control, int flags, struct lwp *l) { struct mbuf **mp, *m; - struct proc *p; long space, len, resid, clen, mlen; int error, s, dontroute, atomic; short wakeup_state = 0; - p = l->l_proc; clen = 0; /* @@ -926,8 +929,7 @@ sosend(struct socket *so, struct mbuf *a } if ((so->so_state & SS_ISCONNECTED) == 0) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) { - if ((so->so_state & SS_ISCONFIRMING) == 0 && - !(resid == 0 && clen != 0)) { + if (resid || clen == 0) { error = ENOTCONN; goto release; } @@ -1045,9 +1047,12 @@ sosend(struct socket *so, struct mbuf *a so->so_options |= SO_DONTROUTE; if (resid > 0) so->so_state |= SS_MORETOCOME; - error = (*so->so_proto->pr_usrreq)(so, - (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, - top, addr, control, curlwp); + if (flags & MSG_OOB) + error = (*so->so_proto->pr_usrreqs->pr_sendoob)(so, + top, control); + else + error = (*so->so_proto->pr_usrreqs->pr_send)(so, + top, addr, control, l); if (dontroute) so->so_options &= ~SO_DONTROUTE; if (resid > 0) @@ -1132,7 +1137,8 @@ soreceive(struct socket *so, struct mbuf { struct lwp *l = curlwp; struct mbuf *m, **mp, *mt; - int atomic, flags, len, error, s, offset, moff, type, orig_resid; + size_t len, offset, moff, orig_resid; + int atomic, flags, error, s, type; const struct protosw *pr; struct mbuf *nextrecord; int mbuf_removed = 0; @@ -1158,14 +1164,13 @@ soreceive(struct socket *so, struct mbuf if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); solock(so); - error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, - (struct mbuf *)(long)(flags & MSG_PEEK), NULL, l); + error = (*pr->pr_usrreqs->pr_recvoob)(so, m, flags & MSG_PEEK); sounlock(so); if (error) goto bad; do { error = uiomove(mtod(m, void *), - (int) min(uio->uio_resid, m->m_len), uio); + MIN(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid > 0 && error == 0 && m); bad: @@ -1183,9 +1188,6 @@ soreceive(struct socket *so, struct mbuf */ s = splsoftnet(); solock(so); - if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, l); - restart: if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) { sounlock(so); @@ -1419,7 +1421,7 @@ soreceive(struct socket *so, struct mbuf SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); sounlock(so); splx(s); - error = uiomove(mtod(m, char *) + moff, (int)len, uio); + error = uiomove(mtod(m, char *) + moff, len, uio); s = splsoftnet(); solock(so); if (error != 0) { @@ -1529,8 +1531,7 @@ soreceive(struct socket *so, struct mbuf * get it filled again. */ if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, - NULL, (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); if (wakeup_state & SS_RESTARTSYS) @@ -1571,8 +1572,7 @@ soreceive(struct socket *so, struct mbuf SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) - (*pr->pr_usrreq)(so, PRU_RCVD, NULL, - (struct mbuf *)(long)flags, NULL, l); + (*pr->pr_usrreqs->pr_rcvd)(so, flags, l); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { @@ -1606,8 +1606,7 @@ soshutdown(struct socket *so, int how) error = 0; } if (how == SHUT_WR || how == SHUT_RDWR) - error = (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, - NULL, NULL, NULL); + error = (*pr->pr_usrreqs->pr_shutdown)(so); return error; } @@ -1666,7 +1665,8 @@ sorflush(struct socket *so) static int sosetopt1(struct socket *so, const struct sockopt *sopt) { - int error = EINVAL, optval, opt; + int error = EINVAL, opt; + int optval = 0; /* XXX: gcc */ struct linger l; struct timeval tv; @@ -1908,6 +1908,7 @@ sogetopt1(struct socket *so, struct sock #ifdef SO_OTIMESTAMP case SO_OTIMESTAMP: #endif + case SO_ACCEPTCONN: error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); break; @@ -2365,6 +2366,7 @@ sopoll(struct socket *so, int events) #include static int sysctl_kern_somaxkva(SYSCTLFN_PROTO); +static int sysctl_kern_sbmax(SYSCTLFN_PROTO); /* * sysctl helper routine for kern.somaxkva. ensures that the given @@ -2395,16 +2397,35 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) return (error); } +/* + * sysctl helper routine for kern.sbmax. Basically just ensures that + * any new value is not too small. + */ +static int +sysctl_kern_sbmax(SYSCTLFN_ARGS) +{ + int error, new_sbmax; + struct sysctlnode node; + + new_sbmax = sb_max; + node = *rnode; + node.sysctl_data = &new_sbmax; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + if (error || newp == NULL) + return (error); + + KERNEL_LOCK(1, NULL); + error = sb_max_set(new_sbmax); + KERNEL_UNLOCK_ONE(NULL); + + return (error); +} + static void -sysctl_kern_somaxkva_setup(void) +sysctl_kern_socket_setup(void) { KASSERT(socket_sysctllog == NULL); - sysctl_createv(&socket_sysctllog, 0, NULL, NULL, - CTLFLAG_PERMANENT, - CTLTYPE_NODE, "kern", NULL, - NULL, 0, NULL, 0, - CTL_KERN, CTL_EOL); sysctl_createv(&socket_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, @@ -2413,4 +2434,11 @@ sysctl_kern_somaxkva_setup(void) "used for socket buffers"), sysctl_kern_somaxkva, 0, NULL, 0, CTL_KERN, KERN_SOMAXKVA, CTL_EOL); + + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, + CTLFLAG_PERMANENT|CTLFLAG_READWRITE, + CTLTYPE_INT, "sbmax", + SYSCTL_DESCR("Maximum socket buffer size"), + sysctl_kern_sbmax, 0, NULL, 0, + CTL_KERN, KERN_SBMAX, CTL_EOL); }