[BACK]Return to hijack.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / lib / librumphijack

Annotation of src/lib/librumphijack/hijack.c, Revision 1.21

1.21    ! christos    1: /*      $NetBSD: hijack.c,v 1.20 2011/01/25 17:37:00 pooka Exp $       */
1.1       pooka       2:
                      3: /*-
                      4:  * Copyright (c) 2011 Antti Kantee.  All Rights Reserved.
                      5:  *
                      6:  * Redistribution and use in source and binary forms, with or without
                      7:  * modification, are permitted provided that the following conditions
                      8:  * are met:
                      9:  * 1. Redistributions of source code must retain the above copyright
                     10:  *    notice, this list of conditions and the following disclaimer.
                     11:  * 2. Redistributions in binary form must reproduce the above copyright
                     12:  *    notice, this list of conditions and the following disclaimer in the
                     13:  *    documentation and/or other materials provided with the distribution.
                     14:  *
                     15:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
                     16:  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
                     17:  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
                     18:  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
                     19:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     20:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
                     21:  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     22:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     23:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     24:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     25:  * SUCH DAMAGE.
                     26:  */
                     27:
                     28: #include <sys/cdefs.h>
1.21    ! christos   29: __RCSID("$NetBSD: hijack.c,v 1.20 2011/01/25 17:37:00 pooka Exp $");
        !            30:
        !            31: #define __ssp_weak_name(fun) _hijack_ ## fun
1.1       pooka      32:
                     33: #include <sys/param.h>
                     34: #include <sys/types.h>
1.10      pooka      35: #include <sys/event.h>
1.1       pooka      36: #include <sys/ioctl.h>
                     37: #include <sys/socket.h>
                     38: #include <sys/poll.h>
                     39:
                     40: #include <rump/rumpclient.h>
                     41: #include <rump/rump_syscalls.h>
                     42:
                     43: #include <assert.h>
                     44: #include <dlfcn.h>
                     45: #include <err.h>
                     46: #include <errno.h>
                     47: #include <fcntl.h>
                     48: #include <poll.h>
                     49: #include <pthread.h>
1.3       pooka      50: #include <signal.h>
1.1       pooka      51: #include <stdarg.h>
1.8       pooka      52: #include <stdbool.h>
1.1       pooka      53: #include <stdio.h>
                     54: #include <stdlib.h>
1.3       pooka      55: #include <time.h>
1.1       pooka      56: #include <unistd.h>
                     57:
1.17      pooka      58: enum dualcall {
                     59:        DUALCALL_WRITE, DUALCALL_WRITEV,
                     60:        DUALCALL_IOCTL, DUALCALL_FCNTL,
                     61:        DUALCALL_SOCKET, DUALCALL_ACCEPT, DUALCALL_BIND, DUALCALL_CONNECT,
                     62:        DUALCALL_GETPEERNAME, DUALCALL_GETSOCKNAME, DUALCALL_LISTEN,
                     63:        DUALCALL_RECVFROM, DUALCALL_RECVMSG,
                     64:        DUALCALL_SENDTO, DUALCALL_SENDMSG,
                     65:        DUALCALL_GETSOCKOPT, DUALCALL_SETSOCKOPT,
                     66:        DUALCALL_SHUTDOWN,
                     67:        DUALCALL_READ, DUALCALL_READV,
                     68:        DUALCALL_DUP2, DUALCALL_CLOSE,
                     69:        DUALCALL_POLLTS,
                     70:        DUALCALL__NUM
1.1       pooka      71: };
                     72:
1.8       pooka      73: #define RSYS_STRING(a) __STRING(a)
                     74: #define RSYS_NAME(a) RSYS_STRING(__CONCAT(RUMP_SYS_RENAME_,a))
                     75:
1.1       pooka      76: /*
1.14      pooka      77:  * Would be nice to get this automatically in sync with libc.
                     78:  * Also, this does not work for compat-using binaries!
                     79:  */
                     80: #if !__NetBSD_Prereq__(5,99,7)
1.17      pooka      81: #define LIBCSELECT select
                     82: #define LIBCPOLLTS pollts
                     83: #define LIBCPOLL poll
1.14      pooka      84: #else
1.17      pooka      85: #define LIBCSELECT __select50
                     86: #define LIBCPOLLTS __pollts50
                     87: #define LIBCPOLL __poll50
                     88: #endif
1.14      pooka      89:
1.20      pooka      90: int LIBCSELECT(int, fd_set *, fd_set *, fd_set *, struct timeval *);
                     91: int LIBCPOLLTS(struct pollfd *, nfds_t,
                     92:               const struct timespec *, const sigset_t *);
                     93: int LIBCPOLL(struct pollfd *, nfds_t, int);
1.17      pooka      94:
                     95: #define S(a) __STRING(a)
                     96: struct sysnames {
                     97:        enum dualcall scm_callnum;
                     98:        const char *scm_hostname;
                     99:        const char *scm_rumpname;
                    100: } syscnames[] = {
                    101:        { DUALCALL_SOCKET,      "__socket30",   RSYS_NAME(SOCKET)       },
                    102:        { DUALCALL_ACCEPT,      "accept",       RSYS_NAME(ACCEPT)       },
                    103:        { DUALCALL_BIND,        "bind",         RSYS_NAME(BIND)         },
                    104:        { DUALCALL_CONNECT,     "connect",      RSYS_NAME(CONNECT)      },
                    105:        { DUALCALL_GETPEERNAME, "getpeername",  RSYS_NAME(GETPEERNAME)  },
                    106:        { DUALCALL_GETSOCKNAME, "getsockname",  RSYS_NAME(GETSOCKNAME)  },
                    107:        { DUALCALL_LISTEN,      "listen",       RSYS_NAME(LISTEN)       },
                    108:        { DUALCALL_RECVFROM,    "recvfrom",     RSYS_NAME(RECVFROM)     },
                    109:        { DUALCALL_RECVMSG,     "recvmsg",      RSYS_NAME(RECVMSG)      },
                    110:        { DUALCALL_SENDTO,      "sendto",       RSYS_NAME(SENDTO)       },
                    111:        { DUALCALL_SENDMSG,     "sendmsg",      RSYS_NAME(SENDMSG)      },
                    112:        { DUALCALL_GETSOCKOPT,  "getsockopt",   RSYS_NAME(GETSOCKOPT)   },
                    113:        { DUALCALL_SETSOCKOPT,  "setsockopt",   RSYS_NAME(SETSOCKOPT)   },
                    114:        { DUALCALL_SHUTDOWN,    "shutdown",     RSYS_NAME(SHUTDOWN)     },
                    115:        { DUALCALL_READ,        "read",         RSYS_NAME(READ)         },
                    116:        { DUALCALL_READV,       "readv",        RSYS_NAME(READV)        },
                    117:        { DUALCALL_WRITE,       "write",        RSYS_NAME(WRITE)        },
                    118:        { DUALCALL_WRITEV,      "writev",       RSYS_NAME(WRITEV)       },
                    119:        { DUALCALL_IOCTL,       "ioctl",        RSYS_NAME(IOCTL)        },
                    120:        { DUALCALL_FCNTL,       "fcntl",        RSYS_NAME(FCNTL)        },
                    121:        { DUALCALL_DUP2,        "dup2",         RSYS_NAME(DUP2)         },
                    122:        { DUALCALL_CLOSE,       "close",        RSYS_NAME(CLOSE)        },
                    123:        { DUALCALL_POLLTS,      S(LIBCPOLLTS),  RSYS_NAME(POLLTS)       },
                    124: };
                    125: #undef S
                    126:
                    127: struct bothsys {
                    128:        void *bs_host;
                    129:        void *bs_rump;
                    130: } syscalls[DUALCALL__NUM];
                    131: #define GETSYSCALL(which, name) syscalls[DUALCALL_##name].bs_##which
                    132:
                    133: pid_t (*host_fork)(void);
                    134:
                    135: static unsigned dup2mask;
                    136: #define ISDUP2D(fd) (1<<(fd) & dup2mask)
                    137:
                    138: //#define DEBUGJACK
                    139: #ifdef DEBUGJACK
                    140: #define DPRINTF(x) mydprintf x
                    141: static void
                    142: mydprintf(const char *fmt, ...)
                    143: {
                    144:        va_list ap;
                    145:
                    146:        if (ISDUP2D(STDERR_FILENO))
                    147:                return;
                    148:
                    149:        va_start(ap, fmt);
                    150:        vfprintf(stderr, fmt, ap);
                    151:        va_end(ap);
                    152: }
                    153:
                    154: #else
                    155: #define DPRINTF(x)
1.14      pooka     156: #endif
                    157:
1.17      pooka     158: #define FDCALL(type, name, rcname, args, proto, vars)                  \
                    159: type name args                                                         \
                    160: {                                                                      \
                    161:        type (*fun) proto;                                              \
                    162:                                                                        \
                    163:        if (fd_isrump(fd)) {                                            \
                    164:                fun = syscalls[rcname].bs_rump;                         \
                    165:                fd = fd_host2rump(fd);                                  \
                    166:        } else {                                                        \
                    167:                fun = syscalls[rcname].bs_host;                         \
                    168:        }                                                               \
                    169:                                                                        \
                    170:        return fun vars;                                                \
                    171: }
                    172:
1.14      pooka     173: /*
1.1       pooka     174:  * This is called from librumpclient in case of LD_PRELOAD.
                    175:  * It ensures correct RTLD_NEXT.
                    176:  */
                    177: static void *
                    178: hijackdlsym(void *handle, const char *symbol)
                    179: {
                    180:
                    181:        return dlsym(handle, symbol);
                    182: }
                    183:
1.7       pooka     184: /* low calorie sockets? */
1.14      pooka     185: static bool hostlocalsockets = true;
1.7       pooka     186:
1.1       pooka     187: static void __attribute__((constructor))
                    188: rcinit(void)
                    189: {
                    190:        int (*rumpcinit)(void);
                    191:        void **rumpcdlsym;
                    192:        void *hand;
1.19      pooka     193:        unsigned i, j;
1.1       pooka     194:
                    195:        hand = dlopen("librumpclient.so", RTLD_LAZY|RTLD_GLOBAL);
                    196:        if (!hand)
                    197:                err(1, "cannot open librumpclient.so");
                    198:        rumpcinit = dlsym(hand, "rumpclient_init");
                    199:        _DIAGASSERT(rumpcinit);
                    200:
                    201:        rumpcdlsym = dlsym(hand, "rumpclient_dlsym");
                    202:        *rumpcdlsym = hijackdlsym;
1.17      pooka     203:        host_fork = dlsym(RTLD_NEXT, "fork");
                    204:
                    205:        /*
                    206:         * In theory cannot print anything during lookups because
                    207:         * we might not have the call vector set up.  so, the errx()
                    208:         * is a bit of a strech, but it might work.
                    209:         */
1.1       pooka     210:
1.17      pooka     211:        for (i = 0; i < DUALCALL__NUM; i++) {
                    212:                /* build runtime O(1) access */
                    213:                for (j = 0; j < __arraycount(syscnames); j++) {
                    214:                        if (syscnames[j].scm_callnum == i)
                    215:                                break;
                    216:                }
                    217:
                    218:                if (j == __arraycount(syscnames))
                    219:                        errx(1, "rumphijack error: syscall pos %d missing", i);
                    220:
                    221:                syscalls[i].bs_host = dlsym(hand,syscnames[j].scm_hostname);
                    222:                if (syscalls[i].bs_host == NULL)
                    223:                        errx(1, "hostcall %s not found missing",
                    224:                            syscnames[j].scm_hostname);
                    225:
                    226:                syscalls[i].bs_rump = dlsym(hand,syscnames[j].scm_rumpname);
                    227:                if (syscalls[i].bs_rump == NULL)
                    228:                        errx(1, "rumpcall %s not found missing",
                    229:                            syscnames[j].scm_rumpname);
1.1       pooka     230:        }
                    231:
                    232:        if (rumpcinit() == -1)
                    233:                err(1, "rumpclient init");
                    234: }
                    235:
1.2       pooka     236: /* XXX: need runtime selection.  low for now due to FD_SETSIZE */
                    237: #define HIJACK_FDOFF 128
                    238: #define HIJACK_SELECT 128 /* XXX */
                    239: #define HIJACK_ASSERT 128 /* XXX */
                    240: static int
                    241: fd_rump2host(int fd)
                    242: {
                    243:
                    244:        if (fd == -1)
                    245:                return fd;
                    246:
                    247:        if (!ISDUP2D(fd))
                    248:                fd += HIJACK_FDOFF;
                    249:
                    250:        return fd;
                    251: }
                    252:
                    253: static int
                    254: fd_host2rump(int fd)
                    255: {
                    256:
                    257:        if (!ISDUP2D(fd))
                    258:                fd -= HIJACK_FDOFF;
                    259:        return fd;
                    260: }
                    261:
                    262: static bool
                    263: fd_isrump(int fd)
                    264: {
                    265:
                    266:        return ISDUP2D(fd) || fd >= HIJACK_FDOFF;
                    267: }
                    268:
                    269: #define assertfd(_fd_) assert(ISDUP2D(_fd_) || (_fd_) >= HIJACK_ASSERT)
                    270: #undef HIJACK_FDOFF
                    271:
1.1       pooka     272: int __socket30(int, int, int);
                    273: int
                    274: __socket30(int domain, int type, int protocol)
                    275: {
1.17      pooka     276:        int (*op_socket)(int, int, int);
1.1       pooka     277:        int fd;
1.7       pooka     278:        bool dohost;
                    279:
                    280:        dohost = hostlocalsockets && (domain == AF_LOCAL);
1.1       pooka     281:
1.7       pooka     282:        if (dohost)
1.17      pooka     283:                op_socket = GETSYSCALL(host, SOCKET);
1.7       pooka     284:        else
1.17      pooka     285:                op_socket = GETSYSCALL(rump, SOCKET);
                    286:        fd = op_socket(domain, type, protocol);
1.2       pooka     287:
1.7       pooka     288:        if (!dohost)
                    289:                fd = fd_rump2host(fd);
                    290:        DPRINTF(("socket <- %d\n", fd));
1.2       pooka     291:
1.7       pooka     292:        return fd;
1.1       pooka     293: }
                    294:
                    295: int
                    296: accept(int s, struct sockaddr *addr, socklen_t *addrlen)
                    297: {
1.17      pooka     298:        int (*op_accept)(int, struct sockaddr *, socklen_t *);
1.1       pooka     299:        int fd;
1.7       pooka     300:        bool isrump;
                    301:
                    302:        isrump = fd_isrump(s);
1.1       pooka     303:
1.2       pooka     304:        DPRINTF(("accept -> %d", s));
1.7       pooka     305:        if (isrump) {
1.17      pooka     306:                op_accept = GETSYSCALL(rump, ACCEPT);
1.7       pooka     307:                s = fd_host2rump(s);
                    308:        } else {
1.17      pooka     309:                op_accept = GETSYSCALL(host, ACCEPT);
1.7       pooka     310:        }
1.17      pooka     311:        fd = op_accept(s, addr, addrlen);
1.7       pooka     312:        if (fd != -1 && isrump)
                    313:                fd = fd_rump2host(fd);
                    314:
                    315:        DPRINTF((" <- %d\n", fd));
1.2       pooka     316:
1.7       pooka     317:        return fd;
1.1       pooka     318: }
                    319:
1.17      pooka     320: /*
                    321:  * ioctl and fcntl are varargs calls and need special treatment
                    322:  */
1.1       pooka     323: int
1.17      pooka     324: ioctl(int fd, unsigned long cmd, ...)
1.1       pooka     325: {
1.17      pooka     326:        int (*op_ioctl)(int, unsigned long cmd, ...);
                    327:        va_list ap;
                    328:        int rv;
1.1       pooka     329:
1.17      pooka     330:        DPRINTF(("ioctl -> %d\n", fd));
                    331:        if (fd_isrump(fd)) {
                    332:                fd = fd_host2rump(fd);
                    333:                op_ioctl = GETSYSCALL(rump, IOCTL);
1.7       pooka     334:        } else {
1.17      pooka     335:                op_ioctl = GETSYSCALL(host, IOCTL);
1.7       pooka     336:        }
1.1       pooka     337:
1.17      pooka     338:        va_start(ap, cmd);
                    339:        rv = op_ioctl(fd, cmd, va_arg(ap, void *));
                    340:        va_end(ap);
                    341:        return rv;
1.1       pooka     342: }
                    343:
                    344: int
1.17      pooka     345: fcntl(int fd, int cmd, ...)
1.1       pooka     346: {
1.17      pooka     347:        int (*op_fcntl)(int, int, ...);
                    348:        va_list ap;
                    349:        int rv;
1.1       pooka     350:
1.17      pooka     351:        DPRINTF(("fcntl -> %d\n", fd));
                    352:        if (fd_isrump(fd)) {
                    353:                fd = fd_host2rump(fd);
                    354:                op_fcntl = GETSYSCALL(rump, FCNTL);
1.7       pooka     355:        } else {
1.17      pooka     356:                op_fcntl = GETSYSCALL(host, FCNTL);
1.7       pooka     357:        }
1.1       pooka     358:
1.17      pooka     359:        va_start(ap, cmd);
                    360:        rv = op_fcntl(fd, cmd, va_arg(ap, void *));
                    361:        va_end(ap);
                    362:        return rv;
1.1       pooka     363: }
                    364:
1.17      pooka     365: /*
                    366:  * write cannot issue a standard debug printf due to recursion
                    367:  */
1.1       pooka     368: ssize_t
1.17      pooka     369: write(int fd, const void *buf, size_t blen)
1.1       pooka     370: {
1.17      pooka     371:        ssize_t (*op_write)(int, const void *, size_t);
1.1       pooka     372:
1.17      pooka     373:        if (fd_isrump(fd)) {
                    374:                fd = fd_host2rump(fd);
                    375:                op_write = GETSYSCALL(rump, WRITE);
1.16      pooka     376:        } else {
1.17      pooka     377:                op_write = GETSYSCALL(host, WRITE);
1.16      pooka     378:        }
1.1       pooka     379:
1.17      pooka     380:        return op_write(fd, buf, blen);
1.2       pooka     381: }
                    382:
                    383: /*
                    384:  * dup2 is special.  we allow dup2 of a rump kernel fd to 0-2 since
                    385:  * many programs do that.  dup2 of a rump kernel fd to another value
                    386:  * not >= fdoff is an error.
                    387:  *
                    388:  * Note: cannot rump2host newd, because it is often hardcoded.
                    389:  */
                    390: int
                    391: dup2(int oldd, int newd)
                    392: {
1.17      pooka     393:        int (*host_dup2)(int, int);
1.2       pooka     394:        int rv;
                    395:
                    396:        DPRINTF(("dup2 -> %d (o) -> %d (n)\n", oldd, newd));
                    397:
                    398:        if (fd_isrump(oldd)) {
                    399:                if (!(newd >= 0 && newd <= 2))
                    400:                        return EBADF;
                    401:                oldd = fd_host2rump(oldd);
                    402:                rv = rump_sys_dup2(oldd, newd);
                    403:                if (rv != -1)
1.10      pooka     404:                        dup2mask |= 1<<newd;
1.2       pooka     405:        } else {
1.17      pooka     406:                host_dup2 = syscalls[DUALCALL_DUP2].bs_host;
1.10      pooka     407:                rv = host_dup2(oldd, newd);
1.2       pooka     408:        }
1.10      pooka     409:
                    410:        return rv;
1.2       pooka     411: }
                    412:
                    413: /*
                    414:  * We just wrap fork the appropriate rump client calls to preserve
                    415:  * the file descriptors of the forked parent in the child, but
                    416:  * prevent double use of connection fd.
                    417:  */
                    418: pid_t
                    419: fork()
                    420: {
                    421:        struct rumpclient_fork *rf;
                    422:        pid_t rv;
                    423:
                    424:        DPRINTF(("fork\n"));
                    425:
                    426:        if ((rf = rumpclient_prefork()) == NULL)
                    427:                return -1;
                    428:
                    429:        switch ((rv = host_fork())) {
                    430:        case -1:
                    431:                /* XXX: cancel rf */
                    432:                break;
                    433:        case 0:
                    434:                if (rumpclient_fork_init(rf) == -1)
                    435:                        rv = -1;
                    436:                break;
                    437:        default:
                    438:                break;
                    439:        }
                    440:
                    441:        DPRINTF(("fork returns %d\n", rv));
                    442:        return rv;
1.1       pooka     443: }
                    444:
                    445: /*
1.17      pooka     446:  * select is done by calling poll.
1.1       pooka     447:  */
                    448: int
1.17      pooka     449: LIBCSELECT(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
1.4       pooka     450:        struct timeval *timeout)
1.1       pooka     451: {
1.4       pooka     452:        struct pollfd *pfds;
                    453:        struct timespec ts, *tsp = NULL;
1.19      pooka     454:        nfds_t realnfds;
                    455:        int i, j;
1.4       pooka     456:        int rv, incr;
                    457:
1.7       pooka     458:        DPRINTF(("select\n"));
                    459:
1.4       pooka     460:        /*
                    461:         * Well, first we must scan the fds to figure out how many
                    462:         * fds there really are.  This is because up to and including
1.17      pooka     463:         * nb5 poll() silently refuses nfds > process_maxopen_fds.
1.4       pooka     464:         * Seems to be fixed in current, thank the maker.
                    465:         * god damn cluster...bomb.
                    466:         */
                    467:
                    468:        for (i = 0, realnfds = 0; i < nfds; i++) {
                    469:                if (readfds && FD_ISSET(i, readfds)) {
                    470:                        realnfds++;
                    471:                        continue;
                    472:                }
                    473:                if (writefds && FD_ISSET(i, writefds)) {
                    474:                        realnfds++;
                    475:                        continue;
                    476:                }
                    477:                if (exceptfds && FD_ISSET(i, exceptfds)) {
                    478:                        realnfds++;
                    479:                        continue;
1.1       pooka     480:                }
                    481:        }
                    482:
1.6       pooka     483:        if (realnfds) {
                    484:                pfds = malloc(sizeof(*pfds) * realnfds);
                    485:                if (!pfds)
                    486:                        return -1;
                    487:        } else {
                    488:                pfds = NULL;
                    489:        }
1.1       pooka     490:
1.4       pooka     491:        for (i = 0, j = 0; i < nfds; i++) {
                    492:                incr = 0;
                    493:                pfds[j].events = pfds[j].revents = 0;
                    494:                if (readfds && FD_ISSET(i, readfds)) {
                    495:                        pfds[j].fd = i;
                    496:                        pfds[j].events |= POLLIN;
                    497:                        incr=1;
                    498:                }
                    499:                if (writefds && FD_ISSET(i, writefds)) {
                    500:                        pfds[j].fd = i;
                    501:                        pfds[j].events |= POLLOUT;
                    502:                        incr=1;
                    503:                }
                    504:                if (exceptfds && FD_ISSET(i, exceptfds)) {
                    505:                        pfds[j].fd = i;
                    506:                        pfds[j].events |= POLLHUP|POLLERR;
                    507:                        incr=1;
1.1       pooka     508:                }
1.4       pooka     509:                if (incr)
                    510:                        j++;
1.1       pooka     511:        }
                    512:
1.4       pooka     513:        if (timeout) {
                    514:                TIMEVAL_TO_TIMESPEC(timeout, &ts);
                    515:                tsp = &ts;
                    516:        }
                    517:        rv = pollts(pfds, realnfds, tsp, NULL);
                    518:        if (rv <= 0)
                    519:                goto out;
                    520:
                    521:        /*
                    522:         * ok, harvest results.  first zero out entries (can't use
                    523:         * FD_ZERO for the obvious select-me-not reason).  whee.
                    524:         */
                    525:        for (i = 0; i < nfds; i++) {
                    526:                if (readfds)
                    527:                        FD_CLR(i, readfds);
                    528:                if (writefds)
                    529:                        FD_CLR(i, writefds);
                    530:                if (exceptfds)
                    531:                        FD_CLR(i, exceptfds);
1.1       pooka     532:        }
                    533:
1.4       pooka     534:        /* and then plug in the results */
1.19      pooka     535:        for (i = 0; i < (int)realnfds; i++) {
1.4       pooka     536:                if (readfds) {
                    537:                        if (pfds[i].revents & POLLIN) {
                    538:                                FD_SET(pfds[i].fd, readfds);
                    539:                        }
                    540:                }
                    541:                if (writefds) {
                    542:                        if (pfds[i].revents & POLLOUT) {
                    543:                                FD_SET(pfds[i].fd, writefds);
                    544:                        }
                    545:                }
                    546:                if (exceptfds) {
                    547:                        if (pfds[i].revents & (POLLHUP|POLLERR)) {
                    548:                                FD_SET(pfds[i].fd, exceptfds);
                    549:                        }
                    550:                }
1.1       pooka     551:        }
                    552:
1.4       pooka     553:  out:
                    554:        free(pfds);
1.1       pooka     555:        return rv;
                    556: }
                    557:
                    558: static void
                    559: checkpoll(struct pollfd *fds, nfds_t nfds, int *hostcall, int *rumpcall)
                    560: {
                    561:        nfds_t i;
                    562:
                    563:        for (i = 0; i < nfds; i++) {
1.12      pooka     564:                if (fds[i].fd == -1)
                    565:                        continue;
                    566:
1.2       pooka     567:                if (fd_isrump(fds[i].fd))
                    568:                        (*rumpcall)++;
                    569:                else
1.1       pooka     570:                        (*hostcall)++;
                    571:        }
                    572: }
                    573:
                    574: static void
1.2       pooka     575: adjustpoll(struct pollfd *fds, nfds_t nfds, int (*fdadj)(int))
1.1       pooka     576: {
                    577:        nfds_t i;
                    578:
                    579:        for (i = 0; i < nfds; i++) {
1.2       pooka     580:                fds[i].fd = fdadj(fds[i].fd);
1.1       pooka     581:        }
                    582: }
                    583:
                    584: /*
                    585:  * poll is easy as long as the call comes in the fds only in one
                    586:  * kernel.  otherwise its quite tricky...
                    587:  */
                    588: struct pollarg {
                    589:        struct pollfd *pfds;
                    590:        nfds_t nfds;
1.3       pooka     591:        const struct timespec *ts;
                    592:        const sigset_t *sigmask;
1.1       pooka     593:        int pipefd;
                    594:        int errnum;
                    595: };
                    596:
                    597: static void *
                    598: hostpoll(void *arg)
                    599: {
1.17      pooka     600:        int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
                    601:                         const sigset_t *);
1.1       pooka     602:        struct pollarg *parg = arg;
                    603:        intptr_t rv;
                    604:
1.17      pooka     605:        op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
                    606:        rv = op_pollts(parg->pfds, parg->nfds, parg->ts, parg->sigmask);
1.1       pooka     607:        if (rv == -1)
                    608:                parg->errnum = errno;
                    609:        rump_sys_write(parg->pipefd, &rv, sizeof(rv));
                    610:
                    611:        return (void *)(intptr_t)rv;
                    612: }
                    613:
                    614: int
1.17      pooka     615: LIBCPOLLTS(struct pollfd *fds, nfds_t nfds, const struct timespec *ts,
1.3       pooka     616:        const sigset_t *sigmask)
1.1       pooka     617: {
1.3       pooka     618:        int (*op_pollts)(struct pollfd *, nfds_t, const struct timespec *,
                    619:                         const sigset_t *);
1.17      pooka     620:        int (*host_close)(int);
1.1       pooka     621:        int hostcall = 0, rumpcall = 0;
                    622:        pthread_t pt;
                    623:        nfds_t i;
                    624:        int rv;
                    625:
1.2       pooka     626:        DPRINTF(("poll\n"));
1.1       pooka     627:        checkpoll(fds, nfds, &hostcall, &rumpcall);
                    628:
                    629:        if (hostcall && rumpcall) {
                    630:                struct pollfd *pfd_host = NULL, *pfd_rump = NULL;
                    631:                int rpipe[2] = {-1,-1}, hpipe[2] = {-1,-1};
                    632:                struct pollarg parg;
                    633:                uintptr_t lrv;
                    634:                int sverrno = 0, trv;
                    635:
                    636:                /*
                    637:                 * ok, this is where it gets tricky.  We must support
                    638:                 * this since it's a very common operation in certain
                    639:                 * types of software (telnet, netcat, etc).  We allocate
                    640:                 * two vectors and run two poll commands in separate
                    641:                 * threads.  Whichever returns first "wins" and the
                    642:                 * other kernel's fds won't show activity.
                    643:                 */
                    644:                rv = -1;
                    645:
                    646:                /* allocate full vector for O(n) joining after call */
                    647:                pfd_host = malloc(sizeof(*pfd_host)*(nfds+1));
                    648:                if (!pfd_host)
                    649:                        goto out;
                    650:                pfd_rump = malloc(sizeof(*pfd_rump)*(nfds+1));
                    651:                if (!pfd_rump) {
                    652:                        goto out;
                    653:                }
                    654:
                    655:                /* split vectors */
                    656:                for (i = 0; i < nfds; i++) {
1.3       pooka     657:                        if (fds[i].fd == -1) {
                    658:                                pfd_host[i].fd = -1;
                    659:                                pfd_rump[i].fd = -1;
                    660:                        } else if (fd_isrump(fds[i].fd)) {
1.2       pooka     661:                                pfd_host[i].fd = -1;
                    662:                                pfd_rump[i].fd = fd_host2rump(fds[i].fd);
                    663:                                pfd_rump[i].events = fds[i].events;
                    664:                        } else {
                    665:                                pfd_rump[i].fd = -1;
1.1       pooka     666:                                pfd_host[i].fd = fds[i].fd;
                    667:                                pfd_host[i].events = fds[i].events;
                    668:                        }
1.13      pooka     669:                        fds[i].revents = 0;
1.1       pooka     670:                }
                    671:
                    672:                /*
                    673:                 * then, open two pipes, one for notifications
                    674:                 * to each kernel.
                    675:                 */
                    676:                if (rump_sys_pipe(rpipe) == -1)
                    677:                        goto out;
                    678:                if (pipe(hpipe) == -1)
                    679:                        goto out;
                    680:
                    681:                pfd_host[nfds].fd = hpipe[0];
                    682:                pfd_host[nfds].events = POLLIN;
                    683:                pfd_rump[nfds].fd = rpipe[0];
                    684:                pfd_rump[nfds].events = POLLIN;
                    685:
                    686:                /*
                    687:                 * then, create a thread to do host part and meanwhile
                    688:                 * do rump kernel part right here
                    689:                 */
                    690:
                    691:                parg.pfds = pfd_host;
                    692:                parg.nfds = nfds+1;
1.3       pooka     693:                parg.ts = ts;
                    694:                parg.sigmask = sigmask;
1.1       pooka     695:                parg.pipefd = rpipe[1];
                    696:                pthread_create(&pt, NULL, hostpoll, &parg);
                    697:
1.17      pooka     698:                op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
1.3       pooka     699:                lrv = op_pollts(pfd_rump, nfds+1, ts, NULL);
1.1       pooka     700:                sverrno = errno;
                    701:                write(hpipe[1], &rv, sizeof(rv));
                    702:                pthread_join(pt, (void *)&trv);
                    703:
                    704:                /* check who "won" and merge results */
                    705:                if (lrv != 0 && pfd_host[nfds].revents & POLLIN) {
                    706:                        rv = trv;
                    707:
                    708:                        for (i = 0; i < nfds; i++) {
                    709:                                if (pfd_rump[i].fd != -1)
                    710:                                        fds[i].revents = pfd_rump[i].revents;
                    711:                        }
                    712:                        sverrno = parg.errnum;
                    713:                } else if (trv != 0 && pfd_rump[nfds].revents & POLLIN) {
                    714:                        rv = trv;
                    715:
                    716:                        for (i = 0; i < nfds; i++) {
                    717:                                if (pfd_host[i].fd != -1)
                    718:                                        fds[i].revents = pfd_host[i].revents;
                    719:                        }
                    720:                } else {
                    721:                        rv = 0;
                    722:                }
                    723:
                    724:  out:
1.17      pooka     725:                host_close = syscalls[DUALCALL_CLOSE].bs_host;
1.1       pooka     726:                if (rpipe[0] != -1)
                    727:                        rump_sys_close(rpipe[0]);
                    728:                if (rpipe[1] != -1)
                    729:                        rump_sys_close(rpipe[1]);
                    730:                if (hpipe[0] != -1)
1.9       pooka     731:                        host_close(hpipe[0]);
1.1       pooka     732:                if (hpipe[1] != -1)
1.9       pooka     733:                        host_close(hpipe[1]);
1.1       pooka     734:                free(pfd_host);
                    735:                free(pfd_rump);
                    736:                errno = sverrno;
                    737:        } else {
                    738:                if (hostcall) {
1.17      pooka     739:                        op_pollts = syscalls[DUALCALL_POLLTS].bs_host;
1.1       pooka     740:                } else {
1.17      pooka     741:                        op_pollts = syscalls[DUALCALL_POLLTS].bs_rump;
1.2       pooka     742:                        adjustpoll(fds, nfds, fd_host2rump);
1.1       pooka     743:                }
                    744:
1.3       pooka     745:                rv = op_pollts(fds, nfds, ts, sigmask);
1.1       pooka     746:                if (rumpcall)
1.2       pooka     747:                        adjustpoll(fds, nfds, fd_rump2host);
1.1       pooka     748:        }
                    749:
                    750:        return rv;
                    751: }
                    752:
                    753: int
1.17      pooka     754: LIBCPOLL(struct pollfd *fds, nfds_t nfds, int timeout)
1.1       pooka     755: {
1.3       pooka     756:        struct timespec ts;
                    757:        struct timespec *tsp = NULL;
                    758:
                    759:        if (timeout != INFTIM) {
                    760:                ts.tv_sec = timeout / 1000;
1.11      pooka     761:                ts.tv_nsec = (timeout % 1000) * 1000*1000;
1.3       pooka     762:
                    763:                tsp = &ts;
                    764:        }
1.1       pooka     765:
1.3       pooka     766:        return pollts(fds, nfds, tsp, NULL);
1.1       pooka     767: }
1.10      pooka     768:
                    769: int
                    770: kqueue(void)
                    771: {
                    772:
1.17      pooka     773:        fprintf(stderr, "kqueue unsupported");
1.10      pooka     774:        abort();
1.17      pooka     775:        /*NOTREACHED*/
1.10      pooka     776: }
                    777:
1.17      pooka     778: /*ARGSUSED*/
1.10      pooka     779: int
                    780: kevent(int kq, const struct kevent *changelist, size_t nchanges,
                    781:        struct kevent *eventlist, size_t nevents,
                    782:        const struct timespec *timeout)
                    783: {
                    784:
1.17      pooka     785:        fprintf(stderr, "kqueue unsupported");
1.10      pooka     786:        abort();
1.17      pooka     787:        /*NOTREACHED*/
1.10      pooka     788: }
1.17      pooka     789:
                    790: /*
                    791:  * Rest are std type calls.
                    792:  */
                    793:
                    794: FDCALL(int, bind, DUALCALL_BIND,                                       \
                    795:        (int fd, const struct sockaddr *name, socklen_t namelen),       \
                    796:        (int, const struct sockaddr *, socklen_t),                      \
                    797:        (fd, name, namelen))
                    798:
                    799: FDCALL(int, connect, DUALCALL_CONNECT,                                 \
                    800:        (int fd, const struct sockaddr *name, socklen_t namelen),       \
                    801:        (int, const struct sockaddr *, socklen_t),                      \
                    802:        (fd, name, namelen))
                    803:
                    804: FDCALL(int, getpeername, DUALCALL_GETPEERNAME,                         \
                    805:        (int fd, struct sockaddr *name, socklen_t *namelen),            \
                    806:        (int, struct sockaddr *, socklen_t *),                          \
                    807:        (fd, name, namelen))
                    808:
                    809: FDCALL(int, getsockname, DUALCALL_GETSOCKNAME,                                 \
                    810:        (int fd, struct sockaddr *name, socklen_t *namelen),            \
                    811:        (int, struct sockaddr *, socklen_t *),                          \
                    812:        (fd, name, namelen))
                    813:
                    814: FDCALL(int, listen, DUALCALL_LISTEN,                                   \
                    815:        (int fd, int backlog),                                          \
                    816:        (int, int),                                                     \
                    817:        (fd, backlog))
                    818:
                    819: FDCALL(ssize_t, recvfrom, DUALCALL_RECVFROM,                           \
                    820:        (int fd, void *buf, size_t len, int flags,                      \
                    821:            struct sockaddr *from, socklen_t *fromlen),                 \
                    822:        (int, void *, size_t, int, struct sockaddr *, socklen_t *),     \
                    823:        (fd, buf, len, flags, from, fromlen))
                    824:
                    825: FDCALL(ssize_t, sendto, DUALCALL_SENDTO,                               \
                    826:        (int fd, const void *buf, size_t len, int flags,                \
                    827:            const struct sockaddr *to, socklen_t tolen),                \
                    828:        (int, const void *, size_t, int,                                \
                    829:            const struct sockaddr *, socklen_t),                        \
                    830:        (fd, buf, len, flags, to, tolen))
                    831:
                    832: FDCALL(ssize_t, recvmsg, DUALCALL_RECVMSG,                             \
                    833:        (int fd, struct msghdr *msg, int flags),                        \
                    834:        (int, struct msghdr *, int),                                    \
                    835:        (fd, msg, flags))
                    836:
                    837: FDCALL(ssize_t, sendmsg, DUALCALL_SENDMSG,                             \
                    838:        (int fd, const struct msghdr *msg, int flags),                  \
                    839:        (int, const struct msghdr *, int),                              \
                    840:        (fd, msg, flags))
                    841:
                    842: FDCALL(int, getsockopt, DUALCALL_GETSOCKOPT,                           \
                    843:        (int fd, int level, int optn, void *optval, socklen_t *optlen), \
                    844:        (int, int, int, void *, socklen_t *),                           \
                    845:        (fd, level, optn, optval, optlen))
                    846:
                    847: FDCALL(int, setsockopt, DUALCALL_SETSOCKOPT,                           \
                    848:        (int fd, int level, int optn,                                   \
                    849:            const void *optval, socklen_t optlen),                      \
                    850:        (int, int, int, const void *, socklen_t),                       \
                    851:        (fd, level, optn, optval, optlen))
                    852:
                    853: FDCALL(int, shutdown, DUALCALL_SHUTDOWN,                               \
                    854:        (int fd, int how),                                              \
                    855:        (int, int),                                                     \
                    856:        (fd, how))
                    857:
1.21    ! christos  858: #if _FORTIFY_SOURCE > 0
        !           859: #define STUB(fun) __ssp_weak_name(fun)
        !           860: ssize_t _sys_readlink(const char * __restrict, char * __restrict, size_t);
        !           861: ssize_t
        !           862: STUB(readlink)(const char * __restrict path, char * __restrict buf,
        !           863:     size_t bufsiz)
        !           864: {
        !           865:        return _sys_readlink(path, buf, bufsiz);
        !           866: }
        !           867:
        !           868: char *_sys_getcwd(char *, size_t);
        !           869: char *
        !           870: STUB(getcwd)(char *buf, size_t size)
        !           871: {
        !           872:        return _sys_getcwd(buf, size);
        !           873: }
        !           874: #else
        !           875: #define STUB(fun) fun
        !           876: #endif
        !           877:
        !           878: FDCALL(ssize_t, STUB(read), DUALCALL_READ,                             \
1.17      pooka     879:        (int fd, void *buf, size_t buflen),                             \
                    880:        (int, void *, size_t),                                          \
                    881:        (fd, buf, buflen))
                    882:
1.18      pooka     883: FDCALL(ssize_t, readv, DUALCALL_READV,                                         \
1.17      pooka     884:        (int fd, const struct iovec *iov, int iovcnt),                  \
                    885:        (int, const struct iovec *, int),                               \
                    886:        (fd, iov, iovcnt))
                    887:
                    888: FDCALL(ssize_t, writev, DUALCALL_WRITEV,                               \
                    889:        (int fd, const struct iovec *iov, int iovcnt),                  \
                    890:        (int, const struct iovec *, int),                               \
                    891:        (fd, iov, iovcnt))
                    892:
                    893: FDCALL(int, close, DUALCALL_CLOSE,                                     \
                    894:        (int fd),                                                       \
                    895:        (int),                                                          \
                    896:        (fd))

CVSweb <webmaster@jp.NetBSD.org>