Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files. =================================================================== RCS file: /ftp/cvs/cvsroot/src/lib/librumpclient/rumpclient.c,v rcsdiff: /ftp/cvs/cvsroot/src/lib/librumpclient/rumpclient.c,v: warning: Unknown phrases like `commitid ...;' are present. retrieving revision 1.15 retrieving revision 1.23 diff -u -p -r1.15 -r1.23 --- src/lib/librumpclient/rumpclient.c 2011/01/10 19:49:43 1.15 +++ src/lib/librumpclient/rumpclient.c 2011/02/06 15:41:37 1.23 @@ -1,4 +1,4 @@ -/* $NetBSD: rumpclient.c,v 1.15 2011/01/10 19:49:43 pooka Exp $ */ +/* $NetBSD: rumpclient.c,v 1.23 2011/02/06 15:41:37 pooka Exp $ */ /* * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. @@ -50,6 +50,7 @@ __RCSID("$NetBSD"); #include #include #include +#include #include #include #include @@ -63,32 +64,122 @@ int (*host_close)(int); int (*host_connect)(int, const struct sockaddr *, socklen_t); int (*host_fcntl)(int, int, ...); int (*host_poll)(struct pollfd *, nfds_t, int); -int (*host_pollts)(struct pollfd *, nfds_t, const struct timespec *, - const sigset_t *); ssize_t (*host_read)(int, void *, size_t); ssize_t (*host_sendto)(int, const void *, size_t, int, const struct sockaddr *, socklen_t); int (*host_setsockopt)(int, int, int, const void *, socklen_t); +int (*host_kqueue)(void); +int (*host_kevent)(int, const struct kevent *, size_t, + struct kevent *, size_t, const struct timespec *); + #include "sp_common.c" static struct spclient clispc = { .spc_fd = -1, }; -static int kq; +static int kq = -1; static sigset_t fullset; +static int doconnect(bool); +static int handshake_req(struct spclient *, uint32_t *, int, bool); + +time_t retrytimo = RUMPCLIENT_RETRYCONN_ONCE; + +static int +send_with_recon(struct spclient *spc, const void *data, size_t dlen) +{ + struct timeval starttime, curtime; + time_t prevreconmsg; + unsigned reconretries; + int rv; + + for (prevreconmsg = 0, reconretries = 0;;) { + rv = dosend(spc, data, dlen); + if (__predict_false(rv == ENOTCONN || rv == EBADF)) { + /* no persistent connections */ + if (retrytimo == 0) + break; + + if (!prevreconmsg) { + prevreconmsg = time(NULL); + gettimeofday(&starttime, NULL); + } + if (reconretries == 1) { + if (retrytimo == RUMPCLIENT_RETRYCONN_ONCE) { + rv = ENOTCONN; + break; + } + fprintf(stderr, "rump_sp: connection to " + "kernel lost, trying to reconnect ...\n"); + } else if (time(NULL) - prevreconmsg > 120) { + fprintf(stderr, "rump_sp: still trying to " + "reconnect ...\n"); + prevreconmsg = time(NULL); + } + + /* check that we aren't over the limit */ + if (retrytimo > 0) { + struct timeval tmp; + + gettimeofday(&curtime, NULL); + timersub(&curtime, &starttime, &tmp); + if (tmp.tv_sec >= retrytimo) { + fprintf(stderr, "rump_sp: reconnect " + "failed, %lld second timeout\n", + (long long)retrytimo); + return ENOTCONN; + } + } + + /* adhoc backoff timer */ + if (reconretries < 10) { + usleep(100000 * reconretries); + } else { + sleep(MIN(10, reconretries-9)); + } + reconretries++; + + if ((rv = doconnect(false)) != 0) + continue; + if ((rv = handshake_req(&clispc, NULL, 0, true)) != 0) + continue; + + /* + * ok, reconnect succesful. we need to return to + * the upper layer to get the entire PDU resent. + */ + if (reconretries != 1) + fprintf(stderr, "rump_sp: reconnected!\n"); + rv = EAGAIN; + break; + } else { + _DIAGASSERT(errno != EAGAIN); + break; + } + } + + return rv; +} + static int -waitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask) +cliwaitresp(struct spclient *spc, struct respwait *rw, sigset_t *mask, + bool keeplock) { + uint64_t mygen; + bool imalive = true; pthread_mutex_lock(&spc->spc_mtx); - sendunlockl(spc); + if (!keeplock) + sendunlockl(spc); + mygen = spc->spc_generation; rw->rw_error = 0; - while (!rw->rw_done && rw->rw_error == 0 - && spc->spc_state != SPCSTATE_DYING){ + while (!rw->rw_done && rw->rw_error == 0) { + if (__predict_false(spc->spc_generation != mygen || !imalive)) + break; + /* are we free to receive? */ if (spc->spc_istatus == SPCSTATUS_FREE) { struct kevent kev[8]; @@ -101,9 +192,18 @@ waitresp(struct spclient *spc, struct re for (gotresp = 0; !gotresp; ) { switch (readframe(spc)) { case 0: - rv = kevent(kq, NULL, 0, + rv = host_kevent(kq, NULL, 0, kev, __arraycount(kev), NULL); - assert(rv > 0); + + /* + * XXX: don't know how this can + * happen (timeout cannot expire + * since there isn't one), but + * it does happen + */ + if (__predict_false(rv == 0)) + continue; + for (i = 0; i < rv; i++) { if (kev[i].filter == EVFILT_SIGNAL) @@ -114,7 +214,7 @@ waitresp(struct spclient *spc, struct re continue; case -1: - spc->spc_state = SPCSTATE_DYING; + imalive = false; goto cleanup; default: break; @@ -158,12 +258,12 @@ waitresp(struct spclient *spc, struct re pthread_mutex_unlock(&spc->spc_mtx); pthread_cond_destroy(&rw->rw_cv); - if (spc->spc_state == SPCSTATE_DYING) + if (spc->spc_generation != mygen || !imalive) { return ENOTCONN; + } return rw->rw_error; } - static int syscall_req(struct spclient *spc, int sysnum, const void *data, size_t dlen, void **resp) @@ -180,17 +280,19 @@ syscall_req(struct spclient *spc, int sy pthread_sigmask(SIG_SETMASK, &fullset, &omask); do { - putwait(spc, &rw, &rhdr); - rv = dosend(spc, &rhdr, sizeof(rhdr)); - rv = dosend(spc, data, dlen); - if (rv) { + if ((rv = send_with_recon(spc, &rhdr, sizeof(rhdr))) != 0) { unputwait(spc, &rw); - pthread_sigmask(SIG_SETMASK, &omask, NULL); - return rv; + continue; + } + if ((rv = send_with_recon(spc, data, dlen)) != 0) { + unputwait(spc, &rw); + continue; } - rv = waitresp(spc, &rw, &omask); + rv = cliwaitresp(spc, &rw, &omask, false); + if (rv == ENOTCONN) + rv = EAGAIN; } while (rv == EAGAIN); pthread_sigmask(SIG_SETMASK, &omask, NULL); @@ -199,16 +301,23 @@ syscall_req(struct spclient *spc, int sy } static int -handshake_req(struct spclient *spc, uint32_t *auth, int cancel) +handshake_req(struct spclient *spc, uint32_t *auth, int cancel, bool haslock) { struct handshake_fork rf; struct rsp_hdr rhdr; struct respwait rw; sigset_t omask; + size_t bonus; int rv; + if (auth) { + bonus = sizeof(rf); + } else { + bonus = strlen(getprogname())+1; + } + /* performs server handshake */ - rhdr.rsp_len = sizeof(rhdr) + (auth ? sizeof(rf) : 0); + rhdr.rsp_len = sizeof(rhdr) + bonus; rhdr.rsp_class = RUMPSP_REQ; rhdr.rsp_type = RUMPSP_HANDSHAKE; if (auth) @@ -217,20 +326,30 @@ handshake_req(struct spclient *spc, uint rhdr.rsp_handshake = HANDSHAKE_GUEST; pthread_sigmask(SIG_SETMASK, &fullset, &omask); - putwait(spc, &rw, &rhdr); + if (haslock) + putwait_locked(spc, &rw, &rhdr); + else + putwait(spc, &rw, &rhdr); rv = dosend(spc, &rhdr, sizeof(rhdr)); if (auth) { memcpy(rf.rf_auth, auth, AUTHLEN*sizeof(*auth)); rf.rf_cancel = cancel; - rv = dosend(spc, &rf, sizeof(rf)); - } - if (rv != 0 || cancel) { - unputwait(spc, &rw); - pthread_sigmask(SIG_SETMASK, &omask, NULL); - return rv; + rv = send_with_recon(spc, &rf, sizeof(rf)); + } else { + rv = dosend(spc, getprogname(), strlen(getprogname())+1); + } + if (rv || cancel) { + if (haslock) + unputwait_locked(spc, &rw); + else + unputwait(spc, &rw); + if (cancel) { + pthread_sigmask(SIG_SETMASK, &omask, NULL); + return rv; + } + } else { + rv = cliwaitresp(spc, &rw, &omask, haslock); } - - rv = waitresp(spc, &rw, &omask); pthread_sigmask(SIG_SETMASK, &omask, NULL); if (rv) return rv; @@ -255,26 +374,53 @@ prefork_req(struct spclient *spc, void * rhdr.rsp_error = 0; pthread_sigmask(SIG_SETMASK, &fullset, &omask); - putwait(spc, &rw, &rhdr); - rv = dosend(spc, &rhdr, sizeof(rhdr)); - if (rv != 0) { - unputwait(spc, &rw); - pthread_sigmask(SIG_SETMASK, &omask, NULL); - return rv; - } + do { + putwait(spc, &rw, &rhdr); + rv = send_with_recon(spc, &rhdr, sizeof(rhdr)); + if (rv != 0) { + unputwait(spc, &rw); + continue; + } - rv = waitresp(spc, &rw, &omask); + rv = cliwaitresp(spc, &rw, &omask, false); + if (rv == ENOTCONN) + rv = EAGAIN; + } while (rv == EAGAIN); pthread_sigmask(SIG_SETMASK, &omask, NULL); + *resp = rw.rw_data; return rv; } +/* + * prevent response code from deadlocking with reconnect code + */ static int +resp_sendlock(struct spclient *spc) +{ + int rv = 0; + + pthread_mutex_lock(&spc->spc_mtx); + while (spc->spc_ostatus != SPCSTATUS_FREE) { + if (__predict_false(spc->spc_reconnecting)) { + rv = EBUSY; + goto out; + } + spc->spc_ostatus = SPCSTATUS_WANTED; + pthread_cond_wait(&spc->spc_cv, &spc->spc_mtx); + } + spc->spc_ostatus = SPCSTATUS_BUSY; + + out: + pthread_mutex_unlock(&spc->spc_mtx); + return rv; +} + +static void send_copyin_resp(struct spclient *spc, uint64_t reqno, void *data, size_t dlen, int wantstr) { struct rsp_hdr rhdr; - int rv; if (wantstr) dlen = MIN(dlen, strlen(data)+1); @@ -285,19 +431,17 @@ send_copyin_resp(struct spclient *spc, u rhdr.rsp_type = RUMPSP_COPYIN; rhdr.rsp_sysnum = 0; - sendlock(spc); - rv = dosend(spc, &rhdr, sizeof(rhdr)); - rv = dosend(spc, data, dlen); + if (resp_sendlock(spc) != 0) + return; + (void)dosend(spc, &rhdr, sizeof(rhdr)); + (void)dosend(spc, data, dlen); sendunlock(spc); - - return rv; } -static int +static void send_anonmmap_resp(struct spclient *spc, uint64_t reqno, void *addr) { struct rsp_hdr rhdr; - int rv; rhdr.rsp_len = sizeof(rhdr) + sizeof(addr); rhdr.rsp_reqno = reqno; @@ -305,12 +449,11 @@ send_anonmmap_resp(struct spclient *spc, rhdr.rsp_type = RUMPSP_ANONMMAP; rhdr.rsp_sysnum = 0; - sendlock(spc); - rv = dosend(spc, &rhdr, sizeof(rhdr)); - rv = dosend(spc, &addr, sizeof(addr)); + if (resp_sendlock(spc) != 0) + return; + (void)dosend(spc, &rhdr, sizeof(rhdr)); + (void)dosend(spc, &addr, sizeof(addr)); sendunlock(spc); - - return rv; } int @@ -343,6 +486,7 @@ static void handlereq(struct spclient *spc) { struct rsp_copydata *copydata; + struct rsp_hdr *rhdr = &spc->spc_hdr; void *mapaddr; size_t maplen; int reqtype = spc->spc_hdr.rsp_type; @@ -378,6 +522,14 @@ handlereq(struct spclient *spc) DPRINTF(("rump_sp handlereq: anonmmap: %p\n", mapaddr)); send_anonmmap_resp(spc, spc->spc_hdr.rsp_reqno, mapaddr); break; + case RUMPSP_RAISE: + DPRINTF(("rump_sp handlereq: raise sig %d\n", rhdr->rsp_signo)); + raise((int)rhdr->rsp_signo); + /* + * We most likely have signals blocked, but the signal + * will be handled soon enough when we return. + */ + break; default: printf("PANIC: INVALID TYPE %d\n", reqtype); abort(); @@ -391,58 +543,111 @@ static unsigned ptab_idx; static struct sockaddr *serv_sa; static int -doconnect(void) +doconnect(bool noisy) { + struct respwait rw; + struct rsp_hdr rhdr; struct kevent kev[NSIG+1]; char banner[MAXBANNER]; + struct pollfd pfd; int s, error, flags, i; ssize_t n; + if (kq != -1) + host_close(kq); + kq = -1; + s = -1; + + if (clispc.spc_fd != -1) + host_close(clispc.spc_fd); + clispc.spc_fd = -1; + + /* + * for reconnect, gate everyone out of the receiver code + */ + putwait_locked(&clispc, &rw, &rhdr); + + pthread_mutex_lock(&clispc.spc_mtx); + clispc.spc_reconnecting = 1; + pthread_cond_broadcast(&clispc.spc_cv); + clispc.spc_generation++; + while (clispc.spc_istatus != SPCSTATUS_FREE) { + clispc.spc_istatus = SPCSTATUS_WANTED; + pthread_cond_wait(&rw.rw_cv, &clispc.spc_mtx); + } + kickall(&clispc); + + /* + * we can release it already since we hold the + * send lock during reconnect + * XXX: assert it + */ + clispc.spc_istatus = SPCSTATUS_FREE; + pthread_mutex_unlock(&clispc.spc_mtx); + unputwait_locked(&clispc, &rw); + + free(clispc.spc_buf); + clispc.spc_off = 0; + s = host_socket(parsetab[ptab_idx].domain, SOCK_STREAM, 0); if (s == -1) return -1; - if (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) { + pfd.fd = s; + pfd.events = POLLIN; + while (host_connect(s, serv_sa, (socklen_t)serv_sa->sa_len) == -1) { + if (errno == EINTR) + continue; error = errno; - fprintf(stderr, "rump_sp: client connect failed\n"); + if (noisy) + fprintf(stderr, "rump_sp: client connect failed: %s\n", + strerror(errno)); errno = error; return -1; } if ((error = parsetab[ptab_idx].connhook(s)) != 0) { error = errno; - fprintf(stderr, "rump_sp: connect hook failed\n"); + if (noisy) + fprintf(stderr, "rump_sp: connect hook failed\n"); errno = error; return -1; } if ((n = host_read(s, banner, sizeof(banner)-1)) < 0) { error = errno; - fprintf(stderr, "rump_sp: failed to read banner\n"); + if (noisy) + fprintf(stderr, "rump_sp: failed to read banner\n"); errno = error; return -1; } if (banner[n-1] != '\n') { - fprintf(stderr, "rump_sp: invalid banner\n"); + if (noisy) + fprintf(stderr, "rump_sp: invalid banner\n"); errno = EINVAL; return -1; } banner[n] = '\0'; + /* parse the banner some day */ flags = host_fcntl(s, F_GETFL, 0); if (host_fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1) { - fprintf(stderr, "rump_sp: cannot set socket fd to nonblock\n"); + if (noisy) + fprintf(stderr, "rump_sp: socket fd NONBLOCK: %s\n", + strerror(errno)); errno = EINVAL; return -1; } - - /* parse the banner some day */ + clispc.spc_fd = s; + clispc.spc_state = SPCSTATE_RUNNING; + clispc.spc_reconnecting = 0; /* setup kqueue, we want all signals and the fd */ - if ((kq = kqueue()) == -1) { + if ((kq = host_kqueue()) == -1) { error = errno; - fprintf(stderr, "rump_sp: cannot setup kqueue"); + if (noisy) + fprintf(stderr, "rump_sp: cannot setup kqueue"); errno = error; return -1; } @@ -450,15 +655,23 @@ doconnect(void) for (i = 0; i < NSIG; i++) { EV_SET(&kev[i], i+1, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0, 0, 0); } - EV_SET(&kev[NSIG], s, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0); - if (kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) { + EV_SET(&kev[NSIG], clispc.spc_fd, + EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0); + if (host_kevent(kq, kev, NSIG+1, NULL, 0, NULL) == -1) { error = errno; - fprintf(stderr, "rump_sp: kevent() failed"); + if (noisy) + fprintf(stderr, "rump_sp: kevent() failed"); errno = error; return -1; } - clispc.spc_fd = s; + return 0; +} + +static int +doinit(void) +{ + TAILQ_INIT(&clispc.spc_respwait); pthread_mutex_init(&clispc.spc_mtx, NULL); pthread_cond_init(&clispc.spc_cv, NULL); @@ -492,10 +705,15 @@ rumpclient_init() FINDSYM(connect); FINDSYM(fcntl); FINDSYM(poll); - FINDSYM(pollts); FINDSYM(read); FINDSYM(sendto); FINDSYM(setsockopt); + FINDSYM(kqueue); +#if !__NetBSD_Prereq__(5,99,7) + FINDSYM(kevent); +#else + FINDSYM2(kevent,_sys___kevent50); +#endif #undef FINDSYM #undef FINDSY2 @@ -509,14 +727,17 @@ rumpclient_init() return -1; } - if (doconnect() == -1) + if (doinit() == -1) + return -1; + if (doconnect(true) == -1) return -1; - error = handshake_req(&clispc, NULL, 0); + error = handshake_req(&clispc, NULL, 0, false); if (error) { pthread_mutex_destroy(&clispc.spc_mtx); pthread_cond_destroy(&clispc.spc_cv); - host_close(clispc.spc_fd); + if (clispc.spc_fd != -1) + host_close(clispc.spc_fd); errno = error; return -1; } @@ -556,17 +777,20 @@ int rumpclient_fork_init(struct rumpclient_fork *rpf) { int error; + int osock; - host_close(clispc.spc_fd); - host_close(kq); - kq = -1; + osock = clispc.spc_fd; memset(&clispc, 0, sizeof(clispc)); - clispc.spc_fd = -1; + clispc.spc_fd = osock; - if (doconnect() == -1) + kq = -1; /* kqueue descriptor is not copied over fork() */ + + if (doinit() == -1) + return -1; + if (doconnect(false) == -1) return -1; - error = handshake_req(&clispc, rpf->fork_auth, 0); + error = handshake_req(&clispc, rpf->fork_auth, 0, false); if (error) { pthread_mutex_destroy(&clispc.spc_mtx); pthread_cond_destroy(&clispc.spc_cv); @@ -576,3 +800,13 @@ rumpclient_fork_init(struct rumpclient_f return 0; } + +void +rumpclient_setconnretry(time_t timeout) +{ + + if (timeout < RUMPCLIENT_RETRYCONN_ONCE) + return; /* gigo */ + + retrytimo = timeout; +}