/* $NetBSD: puffs_transport.c,v 1.22.8.2 2007/10/14 11:48:32 yamt Exp $ */ /* * Copyright (c) 2006, 2007 Antti Kantee. All Rights Reserved. * * Development of this software was supported by the * Ulla Tuominen Foundation and the Finnish Cultural Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __KERNEL_RCSID(0, "$NetBSD: puffs_transport.c,v 1.22.8.2 2007/10/14 11:48:32 yamt Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX: for syncer_mutex reference */ /* * puffs instance structures. these are always allocated and freed * from the context of the device node / fileop code. */ struct puffs_instance { pid_t pi_pid; int pi_idx; int pi_fd; struct puffs_mount *pi_pmp; struct selinfo pi_sel; TAILQ_ENTRY(puffs_instance) pi_entries; }; #define PMP_EMBRYO ((struct puffs_mount *)-1) /* before mount */ #define PMP_DEAD ((struct puffs_mount *)-2) /* goner */ static TAILQ_HEAD(, puffs_instance) puffs_ilist = TAILQ_HEAD_INITIALIZER(puffs_ilist); static int get_pi_idx(struct puffs_instance *); /* * public init / deinit */ /* protects both the list and the contents of the list elements */ static kmutex_t pi_mtx; void puffs_transport_init() { mutex_init(&pi_mtx, MUTEX_DEFAULT, IPL_NONE); } void puffs_transport_destroy() { mutex_destroy(&pi_mtx); } /* * helpers */ static void dosuspendresume(void *arg) { struct puffs_mount *pmp = arg; struct mount *mp; int rv; mp = PMPTOMP(pmp); /* * XXX? does this really do any good or is it just * paranoid stupidity? or stupid paranoia? */ if (mp->mnt_iflag & IMNT_UNMOUNT) { printf("puffs dosuspendresume(): detected suspend on " "unmounting fs\n"); goto out; } /* Do the dance. Allow only one concurrent suspend */ rv = vfs_suspend(PMPTOMP(pmp), 1); if (rv == 0) vfs_resume(PMPTOMP(pmp)); out: mutex_enter(&pmp->pmp_lock); KASSERT(pmp->pmp_suspend == 1); pmp->pmp_suspend = 0; puffs_mp_release(pmp); mutex_exit(&pmp->pmp_lock); kthread_exit(0); } static void puffsop_suspend(struct puffs_mount *pmp) { int rv = 0; mutex_enter(&pmp->pmp_lock); if (pmp->pmp_suspend || pmp->pmp_status != PUFFSTAT_RUNNING) { rv = EBUSY; } else { puffs_mp_reference(pmp); pmp->pmp_suspend = 1; } mutex_exit(&pmp->pmp_lock); if (rv) return; rv = kthread_create(PRI_NONE, 0, NULL, dosuspendresume, pmp, NULL, "puffsusp"); /* XXX: "return" rv */ } static int puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf) { struct vnode *vp; voff_t offlo, offhi; int rv, flags = 0; /* XXX: slurry */ if (pf->pf_op == PUFFS_INVAL_NAMECACHE_ALL) { cache_purgevfs(PMPTOMP(pmp)); return 0; } /* * Get vnode, don't lock it. Namecache is protected by its own lock * and we have a reference to protect against premature harvesting. * * The node we want here might be locked and the op is in * userspace waiting for us to complete ==> deadlock. Another * reason we need to eventually bump locking to userspace, as we * will need to lock the node if we wish to do flushes. */ rv = puffs_cookie2vnode(pmp, pf->pf_cookie, 0, 0, &vp); if (rv) { if (rv == PUFFS_NOSUCHCOOKIE) return ENOENT; return rv; } switch (pf->pf_op) { #if 0 /* not quite ready, yet */ case PUFFS_INVAL_NAMECACHE_NODE: struct componentname *pf_cn; char *name; /* get comfortab^Wcomponentname */ MALLOC(pf_cn, struct componentname *, sizeof(struct componentname), M_PUFFS, M_WAITOK | M_ZERO); memset(pf_cn, 0, sizeof(struct componentname)); break; #endif case PUFFS_INVAL_NAMECACHE_DIR: if (vp->v_type != VDIR) { rv = EINVAL; break; } cache_purge1(vp, NULL, PURGE_CHILDREN); break; case PUFFS_INVAL_PAGECACHE_NODE_RANGE: flags = PGO_FREE; /*FALLTHROUGH*/ case PUFFS_FLUSH_PAGECACHE_NODE_RANGE: if (flags == 0) flags = PGO_CLEANIT; if (pf->pf_end > vp->v_size || vp->v_type != VREG) { rv = EINVAL; break; } offlo = trunc_page(pf->pf_start); offhi = round_page(pf->pf_end); if (offhi != 0 && offlo >= offhi) { rv = EINVAL; break; } simple_lock(&vp->v_uobj.vmobjlock); rv = VOP_PUTPAGES(vp, offlo, offhi, flags); break; default: rv = EINVAL; } vrele(vp); return rv; } /* * fd routines, for cloner */ static int puffs_fop_read(struct file *, off_t *, struct uio *, kauth_cred_t, int); static int puffs_fop_write(struct file *, off_t *, struct uio *, kauth_cred_t, int); static int puffs_fop_ioctl(struct file*, u_long, void *, struct lwp *); static int puffs_fop_poll(struct file *, int, struct lwp *); static int puffs_fop_close(struct file *, struct lwp *); static int puffs_fop_kqfilter(struct file *, struct knote *); static const struct fileops puffs_fileops = { puffs_fop_read, puffs_fop_write, puffs_fop_ioctl, fnullop_fcntl, puffs_fop_poll, fbadop_stat, puffs_fop_close, puffs_fop_kqfilter }; static int puffs_fop_read(struct file *fp, off_t *off, struct uio *uio, kauth_cred_t cred, int flags) { struct puffs_mount *pmp = FPTOPMP(fp); size_t origres, moved; int error; if (pmp == PMP_EMBRYO || pmp == PMP_DEAD) { printf("puffs_fop_read: puffs %p, not mounted\n", pmp); return ENOENT; } if (pmp->pmp_curput == NULL) { error = puffs_msgif_getout(pmp, uio->uio_resid, fp->f_flag & O_NONBLOCK, &pmp->pmp_curput, &pmp->pmp_curres, &pmp->pmp_curopaq); if (error) return error; } origres = uio->uio_resid; error = uiomove(pmp->pmp_curput, pmp->pmp_curres, uio); moved = origres - uio->uio_resid; DPRINTF(("puffs_fop_read (%p): moved %zu bytes from %p, error %d\n", pmp, moved, pmp->pmp_curput, error)); KASSERT(pmp->pmp_curres >= moved); pmp->pmp_curres -= moved; pmp->pmp_curput += moved; if (pmp->pmp_curres == 0) { puffs_msgif_releaseout(pmp, pmp->pmp_curopaq, error); pmp->pmp_curput = NULL; } return error; } static int puffs_fop_write(struct file *fp, off_t *off, struct uio *uio, kauth_cred_t cred, int flags) { struct puffs_mount *pmp = FPTOPMP(fp); struct puffs_frame pfr; uint8_t *buf; size_t frsize; int error; DPRINTF(("puffs_fop_write (%p): writing response, resid %zu\n", pmp, uio->uio_resid)); if (pmp == PMP_EMBRYO || pmp == PMP_DEAD) { printf("puffs_fop_write: puffs %p, not mounted\n", pmp); return ENOENT; } error = uiomove(&pfr, sizeof(struct puffs_frame), uio); if (error) return error; /* Sorry mate, the kernel doesn't buffer. */ frsize = pfr.pfr_len - sizeof(struct puffs_frame); if (uio->uio_resid < frsize) return EINVAL; buf = kmem_alloc(frsize + sizeof(struct puffs_frame), KM_SLEEP); memcpy(buf, &pfr, sizeof(pfr)); error = uiomove(buf+sizeof(struct puffs_frame), frsize, uio); if (error == 0) { switch (PUFFSOP_OPCLASS(pfr.pfr_type)) { case PUFFSOP_VN: case PUFFSOP_VFS: puffs_msgif_incoming(pmp, buf); break; case PUFFSOP_FLUSH: puffsop_flush(pmp, (void *)buf); break; case PUFFSOP_SUSPEND: puffsop_suspend(pmp); break; default: /* XXX: send error */ break; } } kmem_free(buf, frsize + sizeof(struct puffs_frame)); return error; } /* * Poll query interface. The question is only if an event * can be read from us (and by read I mean ioctl... ugh). */ #define PUFFPOLL_EVSET (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI) static int puffs_fop_poll(struct file *fp, int events, struct lwp *l) { struct puffs_mount *pmp = FPTOPMP(fp); int revents; if (pmp == PMP_EMBRYO || pmp == PMP_DEAD) { printf("puffs_fop_ioctl: puffs %p, not mounted\n", pmp); return ENOENT; } revents = events & (POLLOUT | POLLWRNORM | POLLWRBAND); if ((events & PUFFPOLL_EVSET) == 0) return revents; /* check queue */ mutex_enter(&pmp->pmp_lock); if (!TAILQ_EMPTY(&pmp->pmp_msg_touser)) revents |= PUFFPOLL_EVSET; else selrecord(l, pmp->pmp_sel); mutex_exit(&pmp->pmp_lock); return revents; } /* * device close = forced unmount. * * unmounting is a frightfully complex operation to avoid races */ static int puffs_fop_close(struct file *fp, struct lwp *l) { struct puffs_instance *pi; struct puffs_mount *pmp; struct mount *mp; int gone, rv; DPRINTF(("puffs_fop_close: device closed, force filesystem unmount\n")); restart: mutex_enter(&pi_mtx); pmp = FPTOPMP(fp); /* * First check if the fs was never mounted. In that case * remove the instance from the list. If mount is attempted later, * it will simply fail. */ if (pmp == PMP_EMBRYO) { pi = FPTOPI(fp); TAILQ_REMOVE(&puffs_ilist, pi, pi_entries); mutex_exit(&pi_mtx); FREE(pi, M_PUFFS); DPRINTF(("puffs_fop_close: pmp associated with fp %p was " "embryonic\n", fp)); return 0; } /* * Next, analyze if unmount was called and the instance is dead. * In this case we can just free the structure and go home, it * was removed from the list by puffs_nukebypmp(). */ if (pmp == PMP_DEAD) { /* would be nice, but don't have a reference to it ... */ /* KASSERT(pmp_status == PUFFSTAT_DYING); */ mutex_exit(&pi_mtx); DPRINTF(("puffs_fop_close: pmp associated with fp %p was " "dead\n", fp)); goto out; } /* * So we have a reference. Proceed to unwrap the file system. */ mp = PMPTOMP(pmp); mutex_exit(&pi_mtx); /* hmm? suspicious locking? */ mutex_enter(&pmp->pmp_lock); puffs_mp_reference(pmp); /* * Free the waiting callers before proceeding any further. * The syncer might be jogging around in this file system * currently. If we allow it to go to the userspace of no * return while trying to get the syncer lock, well ... * synclk: I feel happy, I feel fine. * lockmgr: You're not fooling anyone, you know. */ puffs_userdead(pmp); /* * Make sure someone from puffs_unmount() isn't currently in * userspace. If we don't take this precautionary step, * they might notice that the mountpoint has disappeared * from under them once they return. Especially note that we * cannot simply test for an unmounter before calling * dounmount(), since it might be possible that that particular * invocation of unmount was called without MNT_FORCE. Here we * *must* make sure unmount succeeds. Also, restart is necessary * since pmp isn't locked. We might end up with PMP_DEAD after * restart and exit from there. */ if (pmp->pmp_unmounting) { cv_wait(&pmp->pmp_unmounting_cv, &pmp->pmp_lock); puffs_mp_release(pmp); mutex_exit(&pmp->pmp_lock); DPRINTF(("puffs_fop_close: unmount was in progress for pmp %p, " "restart\n", pmp)); goto restart; } /* Won't access pmp from here anymore */ puffs_mp_release(pmp); mutex_exit(&pmp->pmp_lock); /* * Detach from VFS. First do necessary XXX-dance (from * sys_unmount() & other callers of dounmount() * * XXX Freeze syncer. Must do this before locking the * mount point. See dounmount() for details. * * XXX2: take a reference to the mountpoint before starting to * wait for syncer_mutex. Otherwise the mointpoint can be * wiped out while we wait. */ simple_lock(&mp->mnt_slock); mp->mnt_wcnt++; simple_unlock(&mp->mnt_slock); mutex_enter(&syncer_mutex); simple_lock(&mp->mnt_slock); mp->mnt_wcnt--; if (mp->mnt_wcnt == 0) wakeup(&mp->mnt_wcnt); gone = mp->mnt_iflag & IMNT_GONE; simple_unlock(&mp->mnt_slock); if (gone) { mutex_exit(&syncer_mutex); goto out; } /* * microscopic race condition here (although not with the current * kernel), but can't really fix it without starting a crusade * against vfs_busy(), so let it be, let it be, let it be */ /* * The only way vfs_busy() will fail for us is if the filesystem * is already a goner. * XXX: skating on the thin ice of modern calling conventions ... */ if (vfs_busy(mp, 0, 0)) { mutex_exit(&syncer_mutex); goto out; } /* * Once we have the mount point, unmount() can't interfere.. * or at least in theory it shouldn't. dounmount() reentracy * might require some visiting at some point. */ rv = dounmount(mp, MNT_FORCE, l); KASSERT(rv == 0); out: /* * Finally, release the instance information. It was already * removed from the list by puffs_nukebypmp() and we know it's * dead, so no need to lock. */ pi = FPTOPI(fp); FREE(pi, M_PUFFS); return 0; } static int puffs_fop_ioctl(struct file *fp, u_long cmd, void *data, struct lwp *l) { /* * work already done in sys_ioctl(). skip sanity checks to enable * setting non-blocking fd without yet having mounted the fs */ if (cmd == FIONBIO) return 0; return EINVAL; } /* kqueue stuff */ static void filt_puffsdetach(struct knote *kn) { struct puffs_instance *pi = kn->kn_hook; mutex_enter(&pi_mtx); SLIST_REMOVE(&pi->pi_sel.sel_klist, kn, knote, kn_selnext); mutex_exit(&pi_mtx); } static int filt_puffsioctl(struct knote *kn, long hint) { struct puffs_instance *pi = kn->kn_hook; struct puffs_mount *pmp; int error; error = 0; mutex_enter(&pi_mtx); pmp = pi->pi_pmp; if (pmp == PMP_EMBRYO || pmp == PMP_DEAD) error = 1; mutex_exit(&pi_mtx); if (error) return 0; mutex_enter(&pmp->pmp_lock); kn->kn_data = pmp->pmp_msg_touser_count; mutex_exit(&pmp->pmp_lock); return kn->kn_data != 0; } static const struct filterops puffsioctl_filtops = { 1, NULL, filt_puffsdetach, filt_puffsioctl }; static int puffs_fop_kqfilter(struct file *fp, struct knote *kn) { struct puffs_instance *pi = fp->f_data; struct klist *klist; if (kn->kn_filter != EVFILT_READ) return 1; klist = &pi->pi_sel.sel_klist; kn->kn_fop = &puffsioctl_filtops; kn->kn_hook = pi; mutex_enter(&pi_mtx); SLIST_INSERT_HEAD(klist, kn, kn_selnext); mutex_exit(&pi_mtx); return 0; } /* * Device routines. These are for when /dev/puffs is initially * opened before it has been cloned. */ dev_type_open(puffscdopen); dev_type_close(puffscdclose); dev_type_ioctl(puffscdioctl); /* dev */ const struct cdevsw puffs_cdevsw = { puffscdopen, puffscdclose, noread, nowrite, noioctl, nostop, notty, nopoll, nommap, nokqfilter, D_OTHER }; int puffscdopen(dev_t dev, int flags, int fmt, struct lwp *l) { struct puffs_instance *pi; struct file *fp; int error, fd, idx; /* * XXX: decide on some security model and check permissions */ if (minor(dev) != PUFFS_CLONER) return ENXIO; if ((error = falloc(l, &fp, &fd)) != 0) return error; MALLOC(pi, struct puffs_instance *, sizeof(struct puffs_instance), M_PUFFS, M_WAITOK | M_ZERO); mutex_enter(&pi_mtx); idx = get_pi_idx(pi); if (idx == PUFFS_CLONER) { mutex_exit(&pi_mtx); FREE(pi, M_PUFFS); FILE_UNUSE(fp, l); ffree(fp); return EBUSY; } pi->pi_pid = l->l_proc->p_pid; pi->pi_idx = idx; mutex_exit(&pi_mtx); DPRINTF(("puffscdopen: registered embryonic pmp for pid: %d\n", pi->pi_pid)); return fdclone(l, fp, fd, FREAD|FWRITE, &puffs_fileops, pi); } int puffscdclose(dev_t dev, int flags, int fmt, struct lwp *l) { panic("puffscdclose impossible\n"); return 0; } /* * Set puffs_mount -pointer. Called from puffs_mount(), which is the * earliest place that knows about this. * * We only want to make sure that the caller had the right to open the * device, we don't so much care about which context it gets in case * the same process opened multiple (since they are equal at this point). */ int puffs_setpmp(pid_t pid, int fd, struct puffs_mount *pmp) { struct puffs_instance *pi; int rv = 1; mutex_enter(&pi_mtx); TAILQ_FOREACH(pi, &puffs_ilist, pi_entries) { if (pi->pi_pid == pid && pi->pi_pmp == PMP_EMBRYO) { pi->pi_pmp = pmp; pi->pi_fd = fd; pmp->pmp_sel = &pi->pi_sel; rv = 0; break; } } mutex_exit(&pi_mtx); return rv; } /* * Remove mount point from list of instances. Called from unmount. */ void puffs_nukebypmp(struct puffs_mount *pmp) { struct puffs_instance *pi; mutex_enter(&pi_mtx); TAILQ_FOREACH(pi, &puffs_ilist, pi_entries) { if (pi->pi_pmp == pmp) { TAILQ_REMOVE(&puffs_ilist, pi, pi_entries); break; } } if (pi) pi->pi_pmp = PMP_DEAD; #ifdef DIAGNOSTIC else panic("puffs_nukebypmp: invalid puffs_mount\n"); #endif /* DIAGNOSTIC */ mutex_exit(&pi_mtx); DPRINTF(("puffs_nukebypmp: nuked %p\n", pi)); } /* search sorted list of instances for free minor, sorted insert arg */ static int get_pi_idx(struct puffs_instance *pi_i) { struct puffs_instance *pi; int i; i = 0; TAILQ_FOREACH(pi, &puffs_ilist, pi_entries) { if (i == PUFFS_CLONER) return PUFFS_CLONER; if (i != pi->pi_idx) break; i++; } pi_i->pi_pmp = PMP_EMBRYO; if (pi == NULL) TAILQ_INSERT_TAIL(&puffs_ilist, pi_i, pi_entries); else TAILQ_INSERT_BEFORE(pi, pi_i, pi_entries); return i; }