Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/uipc_socket.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.171 retrieving revision 1.190 diff -u -p -r1.171 -r1.190 --- src/sys/kern/uipc_socket.c 2008/08/06 15:01:23 1.171 +++ src/sys/kern/uipc_socket.c 2009/09/11 22:06:29 1.190 @@ -1,11 +1,11 @@ -/* $NetBSD: uipc_socket.c,v 1.171 2008/08/06 15:01:23 plunky Exp $ */ +/* $NetBSD: uipc_socket.c,v 1.190 2009/09/11 22:06:29 dyoung Exp $ */ /*- - * Copyright (c) 2002, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation - * by Jason R. Thorpe of Wasabi Systems, Inc. + * by Jason R. Thorpe of Wasabi Systems, Inc, and by Andrew Doran. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -63,9 +63,9 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.171 2008/08/06 15:01:23 plunky Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.190 2009/09/11 22:06:29 dyoung Exp $"); -#include "opt_inet.h" +#include "opt_compat_netbsd.h" #include "opt_sock_counters.h" #include "opt_sosend_loan.h" #include "opt_mbuftrace.h" @@ -77,7 +77,7 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include -#include +#include #include #include #include @@ -86,12 +86,18 @@ __KERNEL_RCSID(0, "$NetBSD: uipc_socket. #include #include #include +#include #include #include #include #include #include +#ifdef COMPAT_50 +#include +#include +#endif + #include MALLOC_DEFINE(M_SOOPTS, "soopts", "socket options"); @@ -150,6 +156,9 @@ static kcondvar_t socurkva_cv; static size_t sodopendfree(void); static size_t sodopendfreel(void); +static void sysctl_kern_somaxkva_setup(void); +static struct sysctllog *socket_sysctllog; + static vsize_t sokvareserve(struct socket *so, vsize_t len) { @@ -423,6 +432,8 @@ void soinit(void) { + sysctl_kern_somaxkva_setup(); + mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM); softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); cv_init(&socurkva_cv, "sokva"); @@ -544,6 +555,19 @@ fsocreate(int domain, struct socket **so } int +sofamily(const struct socket *so) +{ + const struct protosw *pr; + const struct domain *dom; + + if ((pr = so->so_proto) == NULL) + return AF_UNSPEC; + if ((dom = pr->pr_domain) == NULL) + return AF_UNSPEC; + return dom->dom_family; +} + +int sobind(struct socket *so, struct mbuf *nam, struct lwp *l) { int error; @@ -614,11 +638,9 @@ sofree(struct socket *so) KASSERT(!cv_has_waiters(&so->so_snd.sb_cv)); sorflush(so); refs = so->so_aborting; /* XXX */ -#ifdef INET - /* remove acccept filter if one is present. */ + /* Remove acccept filter if one is present. */ if (so->so_accf != NULL) - do_setopt_accept_filter(so, NULL); -#endif + (void)accept_filt_clear(so); sounlock(so); if (refs == 0) /* XXX */ soput(so); @@ -639,22 +661,25 @@ soclose(struct socket *so) error = 0; solock(so); if (so->so_options & SO_ACCEPTCONN) { - do { - while ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { + for (;;) { + if ((so2 = TAILQ_FIRST(&so->so_q0)) != 0) { KASSERT(solocked2(so, so2)); (void) soqremque(so2, 0); /* soabort drops the lock. */ (void) soabort(so2); solock(so); + continue; } - while ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { + if ((so2 = TAILQ_FIRST(&so->so_q)) != 0) { KASSERT(solocked2(so, so2)); (void) soqremque(so2, 1); /* soabort drops the lock. */ (void) soabort(so2); solock(so); + continue; } - } while (!TAILQ_EMPTY(&so->so_q0)); + break; + } } if (so->so_pcb == 0) goto discard; @@ -668,7 +693,7 @@ soclose(struct socket *so) if ((so->so_state & SS_ISDISCONNECTING) && so->so_nbio) goto drop; while (so->so_state & SS_ISCONNECTED) { - error = sowait(so, so->so_linger * hz); + error = sowait(so, true, so->so_linger * hz); if (error) break; } @@ -1535,6 +1560,20 @@ soshutdown(struct socket *so, int how) return error; } +int +sodrain(struct socket *so) +{ + int error; + + solock(so); + so->so_state |= SS_ISDRAINING; + cv_broadcast(&so->so_cv); + error = soshutdown(so, SHUT_RDWR); + sounlock(so); + + return error; +} + void sorflush(struct socket *so) { @@ -1570,35 +1609,33 @@ sorflush(struct socket *so) static int sosetopt1(struct socket *so, const struct sockopt *sopt) { - int error, optval; + int error = EINVAL, optval, opt; struct linger l; struct timeval tv; - switch (sopt->sopt_name) { + switch ((opt = sopt->sopt_name)) { -#ifdef INET case SO_ACCEPTFILTER: - error = do_setopt_accept_filter(so, sopt); - if (error) - return error; + error = accept_filt_setopt(so, sopt); + KASSERT(solocked(so)); break; -#endif case SO_LINGER: error = sockopt_get(sopt, &l, sizeof(l)); + solock(so); if (error) - return (error); - + break; if (l.l_linger < 0 || l.l_linger > USHRT_MAX || - l.l_linger > (INT_MAX / hz)) - return EDOM; + l.l_linger > (INT_MAX / hz)) { + error = EDOM; + break; + } so->so_linger = l.l_linger; if (l.l_onoff) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; - - break; + break; case SO_DEBUG: case SO_KEEPALIVE: @@ -1609,14 +1646,17 @@ sosetopt1(struct socket *so, const struc case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: +#ifdef SO_OTIMESTAMP + case SO_OTIMESTAMP: +#endif error = sockopt_getint(sopt, &optval); + solock(so); if (error) - return (error); - + break; if (optval) - so->so_options |= sopt->sopt_name; + so->so_options |= opt; else - so->so_options &= ~sopt->sopt_name; + so->so_options &= ~opt; break; case SO_SNDBUF: @@ -1624,28 +1664,33 @@ sosetopt1(struct socket *so, const struc case SO_SNDLOWAT: case SO_RCVLOWAT: error = sockopt_getint(sopt, &optval); + solock(so); if (error) - return (error); + break; /* * Values < 1 make no sense for any of these * options, so disallow them. */ - if (optval < 1) - return EINVAL; + if (optval < 1) { + error = EINVAL; + break; + } - switch (sopt->sopt_name) { + switch (opt) { case SO_SNDBUF: - if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) - return ENOBUFS; - + if (sbreserve(&so->so_snd, (u_long)optval, so) == 0) { + error = ENOBUFS; + break; + } so->so_snd.sb_flags &= ~SB_AUTOSIZE; break; case SO_RCVBUF: - if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) - return ENOBUFS; - + if (sbreserve(&so->so_rcv, (u_long)optval, so) == 0) { + error = ENOBUFS; + break; + } so->so_rcv.sb_flags &= ~SB_AUTOSIZE; break; @@ -1669,20 +1714,40 @@ sosetopt1(struct socket *so, const struc } break; +#ifdef COMPAT_50 + case SO_OSNDTIMEO: + case SO_ORCVTIMEO: { + struct timeval50 otv; + error = sockopt_get(sopt, &otv, sizeof(otv)); + if (error) { + solock(so); + break; + } + timeval50_to_timeval(&otv, &tv); + opt = opt == SO_OSNDTIMEO ? SO_SNDTIMEO : SO_RCVTIMEO; + error = 0; + /*FALLTHROUGH*/ + } +#endif /* COMPAT_50 */ + case SO_SNDTIMEO: case SO_RCVTIMEO: - error = sockopt_get(sopt, &tv, sizeof(tv)); if (error) - return (error); + error = sockopt_get(sopt, &tv, sizeof(tv)); + solock(so); + if (error) + break; - if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) - return EDOM; + if (tv.tv_sec > (INT_MAX - tv.tv_usec / tick) / hz) { + error = EDOM; + break; + } optval = tv.tv_sec * hz + tv.tv_usec / tick; if (optval == 0 && tv.tv_usec != 0) optval = 1; - switch (sopt->sopt_name) { + switch (opt) { case SO_SNDTIMEO: so->so_snd.sb_timeo = optval; break; @@ -1693,9 +1758,12 @@ sosetopt1(struct socket *so, const struc break; default: - return ENOPROTOOPT; + solock(so); + error = ENOPROTOOPT; + break; } - return 0; + KASSERT(solocked(so)); + return error; } int @@ -1703,11 +1771,13 @@ sosetopt(struct socket *so, struct socko { int error, prerr; - solock(so); - if (sopt->sopt_level == SOL_SOCKET) + if (sopt->sopt_level == SOL_SOCKET) { error = sosetopt1(so, sopt); - else + KASSERT(solocked(so)); + } else { error = ENOPROTOOPT; + solock(so); + } if ((error == 0 || error == ENOPROTOOPT) && so->so_proto != NULL && so->so_proto->pr_ctloutput != NULL) { @@ -1750,17 +1820,15 @@ so_setsockopt(struct lwp *l, struct sock static int sogetopt1(struct socket *so, struct sockopt *sopt) { - int error, optval; + int error, optval, opt; struct linger l; struct timeval tv; - switch (sopt->sopt_name) { + switch ((opt = sopt->sopt_name)) { -#ifdef INET case SO_ACCEPTFILTER: - error = do_getopt_accept_filter(so, sopt); + error = accept_filt_getopt(so, sopt); break; -#endif case SO_LINGER: l.l_onoff = (so->so_options & SO_LINGER) ? 1 : 0; @@ -1778,8 +1846,10 @@ sogetopt1(struct socket *so, struct sock case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: - error = sockopt_setint(sopt, - (so->so_options & sopt->sopt_name) ? 1 : 0); +#ifdef SO_OTIMESTAMP + case SO_OTIMESTAMP: +#endif + error = sockopt_setint(sopt, (so->so_options & opt) ? 1 : 0); break; case SO_TYPE: @@ -1807,9 +1877,25 @@ sogetopt1(struct socket *so, struct sock error = sockopt_setint(sopt, so->so_rcv.sb_lowat); break; +#ifdef COMPAT_50 + case SO_OSNDTIMEO: + case SO_ORCVTIMEO: { + struct timeval50 otv; + + optval = (opt == SO_OSNDTIMEO ? + so->so_snd.sb_timeo : so->so_rcv.sb_timeo); + + otv.tv_sec = optval / hz; + otv.tv_usec = (optval % hz) * tick; + + error = sockopt_set(sopt, &otv, sizeof(otv)); + break; + } +#endif /* COMPAT_50 */ + case SO_SNDTIMEO: case SO_RCVTIMEO: - optval = (sopt->sopt_name == SO_SNDTIMEO ? + optval = (opt == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); tv.tv_sec = optval / hz; @@ -1853,22 +1939,26 @@ sogetopt(struct socket *so, struct socko * alloc sockopt data buffer buffer * - will be released at destroy */ -static void -sockopt_alloc(struct sockopt *sopt, size_t len) +static int +sockopt_alloc(struct sockopt *sopt, size_t len, km_flag_t kmflag) { KASSERT(sopt->sopt_size == 0); - if (len > sizeof(sopt->sopt_buf)) - sopt->sopt_data = malloc(len, M_SOOPTS, M_WAITOK | M_ZERO); - else + if (len > sizeof(sopt->sopt_buf)) { + sopt->sopt_data = kmem_zalloc(len, kmflag); + if (sopt->sopt_data == NULL) + return ENOMEM; + } else sopt->sopt_data = sopt->sopt_buf; sopt->sopt_size = len; + return 0; } /* * initialise sockopt storage + * - MAY sleep during allocation */ void sockopt_init(struct sockopt *sopt, int level, int name, size_t size) @@ -1878,7 +1968,7 @@ sockopt_init(struct sockopt *sopt, int l sopt->sopt_level = level; sopt->sopt_name = name; - sockopt_alloc(sopt, size); + (void)sockopt_alloc(sopt, size, KM_SLEEP); } /* @@ -1890,7 +1980,7 @@ sockopt_destroy(struct sockopt *sopt) { if (sopt->sopt_data != sopt->sopt_buf) - free(sopt->sopt_data, M_SOOPTS); + kmem_free(sopt->sopt_data, sopt->sopt_size); memset(sopt, 0, sizeof(*sopt)); } @@ -1898,14 +1988,18 @@ sockopt_destroy(struct sockopt *sopt) /* * set sockopt value * - value is copied into sockopt - * - memory is allocated when necessary + * - memory is allocated when necessary, will not sleep */ int sockopt_set(struct sockopt *sopt, const void *buf, size_t len) { + int error; - if (sopt->sopt_size == 0) - sockopt_alloc(sopt, len); + if (sopt->sopt_size == 0) { + error = sockopt_alloc(sopt, len, KM_NOSLEEP); + if (error) + return error; + } KASSERT(sopt->sopt_size == len); memcpy(sopt->sopt_data, buf, len); @@ -1951,16 +2045,21 @@ sockopt_getint(const struct sockopt *sop * set sockopt value from mbuf * - ONLY for legacy code * - mbuf is released by sockopt + * - will not sleep */ int sockopt_setmbuf(struct sockopt *sopt, struct mbuf *m) { size_t len; + int error; len = m_length(m); - if (sopt->sopt_size == 0) - sockopt_alloc(sopt, len); + if (sopt->sopt_size == 0) { + error = sockopt_alloc(sopt, len, KM_NOSLEEP); + if (error) + return error; + } KASSERT(sopt->sopt_size == len); m_copydata(m, 0, len, sopt->sopt_data); @@ -1973,23 +2072,30 @@ sockopt_setmbuf(struct sockopt *sopt, st * get sockopt value into mbuf * - ONLY for legacy code * - mbuf to be released by the caller + * - will not sleep */ struct mbuf * sockopt_getmbuf(const struct sockopt *sopt) { struct mbuf *m; - m = m_get(M_WAIT, MT_SOOPTS); - if (m == NULL) + if (sopt->sopt_size > MCLBYTES) return NULL; - m->m_len = MLEN; - m_copyback(m, 0, sopt->sopt_size, sopt->sopt_data); - if (m_length(m) != max(sopt->sopt_size, MLEN)) { - m_freem(m); + m = m_get(M_DONTWAIT, MT_SOOPTS); + if (m == NULL) return NULL; + + if (sopt->sopt_size > MLEN) { + MCLGET(m, M_DONTWAIT); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return NULL; + } } - m->m_len = min(sopt->sopt_size, MLEN); + + memcpy(mtod(m, void *), sopt->sopt_data, sopt->sopt_size); + m->m_len = sopt->sopt_size; return m; } @@ -1999,7 +2105,7 @@ sohasoutofband(struct socket *so) { fownsignal(so->so_pgid, SIGURG, POLL_PRI, POLLPRI|POLLRDBAND, so); - selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, 0); + selnotify(&so->so_rcv.sb_sel, POLLPRI | POLLRDBAND, NOTE_SUBMIT); } static void @@ -2230,16 +2336,18 @@ sysctl_kern_somaxkva(SYSCTLFN_ARGS) return (error); } -SYSCTL_SETUP(sysctl_kern_somaxkva_setup, "sysctl kern.somaxkva setup") +static void +sysctl_kern_somaxkva_setup(void) { - sysctl_createv(clog, 0, NULL, NULL, + KASSERT(socket_sysctllog == NULL); + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT, CTLTYPE_NODE, "kern", NULL, NULL, 0, NULL, 0, CTL_KERN, CTL_EOL); - sysctl_createv(clog, 0, NULL, NULL, + sysctl_createv(&socket_sysctllog, 0, NULL, NULL, CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "somaxkva", SYSCTL_DESCR("Maximum amount of kernel memory to be "