[BACK]Return to perfuse.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / lib / libperfuse

File: [cvs.NetBSD.org] / src / lib / libperfuse / perfuse.c (download)

Revision 1.21, Tue Oct 18 15:47:32 2011 UTC (12 years, 6 months ago) by manu
Branch: MAIN
Changes since 1.20: +1 -8 lines

mlockall is not necessary after all, once we have fixed a kernel bug involving
agedaemon sleeping form memory

/*  $NetBSD: perfuse.c,v 1.21 2011/10/18 15:47:32 manu Exp $ */

/*-
 *  Copyright (c) 2010-2011 Emmanuel Dreyfus. All rights reserved.
 * 
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *  1. Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 * 
 *  THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 *  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 *  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 *  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 *  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *  POSSIBILITY OF SUCH DAMAGE.
 */ 

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <puffs.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/extattr.h>
#include <sys/un.h>
#include <machine/vmparam.h>

#define LIBPERFUSE
#include "perfuse.h"
#include "perfuse_if.h"
#include "perfuse_priv.h"

int perfuse_diagflags = 0; /* global used only in DPRINTF/DERR/DWARN */
extern char **environ;

static struct perfuse_state *init_state(void);
static int get_fd(const char *);


static struct perfuse_state *
init_state(void)
{
	struct perfuse_state *ps;
	char opts[1024];

	if ((ps = malloc(sizeof(*ps))) == NULL)
		DERR(EX_OSERR, "%s: malloc failed", __func__);

	(void)memset(ps, 0, sizeof(*ps));
	ps->ps_max_write = UINT_MAX;
	ps->ps_max_readahead = UINT_MAX;
	
	/*
	 * Most of the time, access() is broken because the filesystem
	 * performs the check with root privileges. glusterfs will do that 
	 * if the Linux-specific setfsuid() is missing, for instance.
	 */
	ps->ps_flags |= PS_NO_ACCESS;

	/*
	 * This is a temporary way to toggle access and creat usage.
	 * It would be nice if that could be provided as mount options,
	 * but that will not be obvious to do.
 	 */
	if (getenv_r("PERFUSE_OPTIONS", opts, sizeof(opts)) != -1) {
		char *optname; 
		char *last;

		 for ((optname = strtok_r(opts, ",", &last));
		      optname != NULL;
		      (optname = strtok_r(NULL, ",", &last))) {
			if (strcmp(optname, "enable_access") == 0)
				ps->ps_flags &= ~PS_NO_ACCESS;

			if (strcmp(optname, "disable_access") == 0)
				ps->ps_flags |= PS_NO_ACCESS;

			if (strcmp(optname, "enable_creat") == 0)
				ps->ps_flags &= ~PS_NO_CREAT;

			if (strcmp(optname, "disable_creat") == 0)
				ps->ps_flags |= PS_NO_CREAT;
		}
	}
	
	
	return ps;
}


static int
get_fd(data)
	const char *data;
{
	char *string;
	const char fdopt[] = "fd=";
	char *lastp;
	char *opt;
	int fd = -1;
	
	if ((string = strdup(data)) == NULL) 
		return -1;
		
	for (opt = strtok_r(string, ",", &lastp);
	     opt != NULL;
	     opt = strtok_r(NULL, ",", &lastp)) {
		if (strncmp(opt, fdopt, strlen(fdopt)) == 0) {
			fd =  atoi(opt + strlen(fdopt));
			break;
		}
	}

	/* 
	 * No file descriptor found
	 */ 
	if (fd == -1)
		errno = EINVAL;

	free(string);
	return fd;

}

int
perfuse_open(path, flags, mode)
	const char *path;
	int flags;
	mode_t mode;
{
	int sv[2];
	struct sockaddr_un sun;
	struct sockaddr *sa;
	char progname[] = _PATH_PERFUSED;
	char minus_i[] = "-i";
	char fdstr[16];
	char *const argv[] = { progname, minus_i, fdstr, NULL};
	uint32_t opt;
	uint32_t optlen;
	int sock_type = SOCK_SEQPACKET;

	if (strcmp(path, _PATH_FUSE) != 0)
		return open(path, flags, mode);

	/* 
	 * Try SOCK_SEQPACKET then SOCK_DGRAM if unavailable
	 */
	if ((sv[0] = socket(PF_LOCAL, SOCK_SEQPACKET, 0)) == -1) {
		sock_type = SOCK_DGRAM;
                DWARNX("SEQPACKET local sockets unavailable, using less "
		       "reliable DGRAM sockets. Expect file operation hangs.");

		if ((sv[0] = socket(PF_LOCAL, SOCK_DGRAM, 0)) == -1) {
#ifdef PERFUSE_DEBUG
			DWARN("%s: %d socket failed", __func__, __LINE__);
#endif
			return -1;
		}
	}

	/*
	 * Set a buffer lentgh large enough so that any FUSE packet
	 * will fit.
	 */
	opt = (uint32_t)FUSE_BUFSIZE;
	optlen = sizeof(opt);
	if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);

	if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);

	sa = (struct sockaddr *)(void *)&sun;
	sun.sun_len = sizeof(sun);
	sun.sun_family = AF_LOCAL;
	(void)strcpy(sun.sun_path, path);

	if (connect(sv[0], sa, (socklen_t)sun.sun_len) == 0)
		return sv[0];

	/*
	 * Attempt to run perfused on our own
	 * if it does not run yet; In that case
	 * we will talk using a socketpair 
	 * instead of /dev/fuse.
	 */
	if (socketpair(PF_LOCAL, sock_type, 0, sv) != 0) {
		DWARN("%s:%d: socketpair failed", __func__, __LINE__);
		return -1;
	}

	/*
	 * Set a buffer lentgh large enough so that any FUSE packet
	 * will fit.
	 */
	opt = (uint32_t)(4 * FUSE_BUFSIZE);
	optlen = sizeof(opt);
	if (setsockopt(sv[0], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);

	if (setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);

	if (setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_SNDBUF to %d failed", __func__, opt);

	if (setsockopt(sv[1], SOL_SOCKET, SO_RCVBUF, &opt, optlen) != 0)
		DWARN("%s: setsockopt SO_RCVBUF to %d failed", __func__, opt);

	/*
	 * Request peer credentials. This musr be done before first 
	 * frame is sent.
	 */
	opt = 1;
	optlen = sizeof(opt);
	if (setsockopt(sv[1], 0, LOCAL_CREDS, &opt, optlen) != 0)
		DWARN("%s: setsockopt LOCAL_CREDS failed", __func__);

	(void)sprintf(fdstr, "%d", sv[1]);

	switch(fork()) {
	case -1:
#ifdef PERFUSE_DEBUG
		DWARN("%s:%d: fork failed", __func__, __LINE__);
#endif
		return -1;
		/* NOTREACHED */
		break;
	case 0:
		(void)execve(argv[0], argv, environ);
#ifdef PERFUSE_DEBUG
		DWARN("%s:%d: execve failed", __func__, __LINE__);
#endif
		return -1;
		/* NOTREACHED */
		break;
	default:
		break;
	}
	
	return sv[0];
}

int
perfuse_mount(source, target, filesystemtype, mountflags, data)
	const char *source;
	const char *target;
	const char *filesystemtype;
	long mountflags;
	const void *data;
{
	int s;
	size_t len;
	struct perfuse_mount_out *pmo;
	struct sockaddr_storage ss;
	struct sockaddr_un *sun;
	struct sockaddr *sa;
	socklen_t sa_len;
	size_t sock_len;
	char *frame;
	char *cp;

#ifdef PERFUSE_DEBUG
	if (perfuse_diagflags & PDF_MISC)
		DPRINTF("%s(\"%s\", \"%s\", \"%s\", 0x%lx, \"%s\")\n",
			__func__, source, target, filesystemtype, 
			mountflags, (const char *)data);
#endif

	if ((s = get_fd(data)) == -1)
		return -1;

	/*
	 * If we are connected to /dev/fuse, we need a second
	 * socket to get replies from perfused.
	 * XXX This socket is not removed at exit time yet
	 */
	sock_len = 0;
	sa = (struct sockaddr *)(void *)&ss;
	sun = (struct sockaddr_un *)(void *)&ss;
	sa_len = sizeof(ss);
	if ((getpeername(s, sa, &sa_len) == 0) &&
	    (sa->sa_family = AF_LOCAL) &&
	    (strcmp(sun->sun_path, _PATH_FUSE) == 0)) {

		sun->sun_len = sizeof(*sun);
		sun->sun_family = AF_LOCAL;
		(void)sprintf(sun->sun_path, "%s/%s-%d",
			      _PATH_TMP, getprogname(), getpid());
		
		if (bind(s, sa, (socklen_t)sa->sa_len) != 0)
			DERR(EX_OSERR, "%s:%d bind to \"%s\" failed",
			     __func__, __LINE__, sun->sun_path);

		sock_len = strlen(sun->sun_path) + 1;
	}
		
	len = sizeof(*pmo);
	len += source ? (uint32_t)strlen(source) + 1 : 0;
	len += target ? (uint32_t)strlen(target) + 1 : 0;
	len += filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
	len += data ? (uint32_t)strlen(data) + 1 : 0;
	len += sock_len;

	if ((frame = malloc(len)) == NULL) {
#ifdef PERFUSE_DEBUG
		if (perfuse_diagflags & PDF_MISC)
			DWARN("%s:%d malloc failed", __func__, __LINE__);
#endif
		return -1;
	}

	pmo = (struct perfuse_mount_out *)(void *)frame;
	pmo->pmo_len = (uint32_t)len;
	pmo->pmo_error = 0;
	pmo->pmo_unique = (uint64_t)-1;
	(void)strcpy(pmo->pmo_magic, PERFUSE_MOUNT_MAGIC);

	pmo->pmo_source_len = source ? (uint32_t)strlen(source) + 1 : 0;
	pmo->pmo_target_len = target ? (uint32_t)strlen(target) + 1: 0;
	pmo->pmo_filesystemtype_len = 
	    filesystemtype ? (uint32_t)strlen(filesystemtype) + 1 : 0;
	pmo->pmo_mountflags = (uint32_t)mountflags;
	pmo->pmo_data_len = data ? (uint32_t)strlen(data) + 1 : 0;
	pmo->pmo_sock_len = (uint32_t)sock_len;
	
	cp = (char *)(void *)(pmo + 1);

	if (source) {
		(void)strcpy(cp, source);
		cp += pmo->pmo_source_len;
	}
	
	if (target) {
		(void)strcpy(cp, target);
		cp += pmo->pmo_target_len;
	}

	if (filesystemtype) {
		(void)strcpy(cp, filesystemtype);
		cp += pmo->pmo_filesystemtype_len;
	}
	
	if (data) {
		(void)strcpy(cp, data);
		cp += pmo->pmo_data_len;
	}

	if (sock_len != 0) {
		(void)strcpy(cp, sun->sun_path);
		cp += pmo->pmo_sock_len;
	}

	if (send(s, frame, len, MSG_NOSIGNAL) != (ssize_t)len) {
#ifdef PERFUSE_DEBUG
		DWARN("%s:%d sendto failed", __func__, __LINE__);
#endif
		return -1;
	}

	return 0;
}


uint64_t
perfuse_next_unique(pu)
	struct puffs_usermount *pu;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);

	return ps->ps_unique++;
} 

struct puffs_usermount *
perfuse_init(pc, pmi)
	struct perfuse_callbacks *pc;
	struct perfuse_mount_info *pmi;
{
	struct perfuse_state *ps;
	struct puffs_usermount *pu;
	struct puffs_ops *pops;
	const char *source = _PATH_PUFFS;
	char *fstype;
	unsigned int puffs_flags;
	struct puffs_node *pn_root;
	struct puffs_pathobj *po_root;

	ps = init_state();
	ps->ps_owner_uid = pmi->pmi_uid;

	if (pmi->pmi_source) {
		if ((ps->ps_source = strdup(pmi->pmi_source)) == NULL)
			DERR(EX_OSERR, "%s: strdup failed", __func__);

		source = ps->ps_source;
	}

	if (pmi->pmi_filesystemtype) {
		size_t len;

		ps->ps_filesystemtype = strdup(pmi->pmi_filesystemtype);
		if (ps->ps_filesystemtype == NULL)
			DERR(EX_OSERR, "%s: strdup failed", __func__);

		len = sizeof("perfuse|") + strlen(ps->ps_filesystemtype) + 1;
		if ((fstype = malloc(len)) == NULL)
			DERR(EX_OSERR, "%s: malloc failed", __func__);

		(void)sprintf(fstype, "perfuse|%s", ps->ps_filesystemtype);
	} else {
		if ((fstype = strdup("perfuse")) == NULL)
			DERR(EX_OSERR, "%s: strdup failed", __func__);
	}

	if ((ps->ps_target = strdup(pmi->pmi_target)) == NULL)
		DERR(EX_OSERR, "%s: strdup failed", __func__);

	ps->ps_mountflags = pmi->pmi_mountflags;

	/*
	 * Some options are forbidden for non root users
	 */
	if (ps->ps_owner_uid != 0)
	    ps->ps_mountflags |= MNT_NOSUID|MNT_NODEV;

	PUFFSOP_INIT(pops);
	PUFFSOP_SET(pops, perfuse, fs, unmount);
	PUFFSOP_SET(pops, perfuse, fs, statvfs);
	PUFFSOP_SET(pops, perfuse, fs, sync);
	PUFFSOP_SET(pops, perfuse, node, lookup);
	PUFFSOP_SET(pops, perfuse, node, create);
	PUFFSOP_SET(pops, perfuse, node, mknod);
	PUFFSOP_SET(pops, perfuse, node, open);
	PUFFSOP_SET(pops, perfuse, node, close);
	PUFFSOP_SET(pops, perfuse, node, access);
	PUFFSOP_SET(pops, perfuse, node, getattr);
	PUFFSOP_SET(pops, perfuse, node, setattr);
	PUFFSOP_SET(pops, perfuse, node, poll);
#if 0 
	PUFFSOP_SET(pops, perfuse, node, mmap);
#endif
	PUFFSOP_SET(pops, perfuse, node, fsync);
	PUFFSOP_SET(pops, perfuse, node, seek);
	PUFFSOP_SET(pops, perfuse, node, remove);
	PUFFSOP_SET(pops, perfuse, node, link);
	PUFFSOP_SET(pops, perfuse, node, rename);
	PUFFSOP_SET(pops, perfuse, node, mkdir);
	PUFFSOP_SET(pops, perfuse, node, rmdir);
	PUFFSOP_SET(pops, perfuse, node, symlink);
	PUFFSOP_SET(pops, perfuse, node, readdir);
	PUFFSOP_SET(pops, perfuse, node, readlink);
	PUFFSOP_SET(pops, perfuse, node, reclaim);
	PUFFSOP_SET(pops, perfuse, node, inactive);
	PUFFSOP_SET(pops, perfuse, node, print);
	PUFFSOP_SET(pops, perfuse, node, advlock);
	PUFFSOP_SET(pops, perfuse, node, read);
	PUFFSOP_SET(pops, perfuse, node, write);
#ifdef PUFFS_EXTNAMELEN
	PUFFSOP_SET(pops, perfuse, node, getextattr);
	PUFFSOP_SET(pops, perfuse, node, setextattr);
	PUFFSOP_SET(pops, perfuse, node, listextattr);
	PUFFSOP_SET(pops, perfuse, node, deleteextattr);
#endif /* PUFFS_EXTNAMELEN */

	/*
	 * We used to have PUFFS_KFLAG_WTCACHE here, which uses the
	 * page cache (highly desirable to get mmap(2)), but still sends
	 * all writes to the filesystem. In fact it does not send the
	 * data written, but the pages that contain it. 
	 *
	 * There is a nasty bug hidden somewhere, possibly in libpuffs'
	 * VOP_FSYNC, which sends an asynchronous PUFFS_SETATTR that
	 * update file size. When writes are in progress, it will cause
	 * the file to be truncated and we get a zero-filled chunk at the
	 * beginning of a page. Removing PUFFS_KFLAG_WTCACHE fixes that
	 * problem. 
	 * 
	 * The other consequences are that changes will not be propagated
	 * immediatly to the filesystem, and we get a huge performance gain
	 * because much less requests are sent. A test case for the above
	 * mentioned bug got its execution time slashed by factor 50.
	 */
	puffs_flags = 0;

	if (perfuse_diagflags & PDF_PUFFS)
		puffs_flags |= PUFFS_FLAG_OPDUMP;

	if ((pu = puffs_init(pops, source, fstype, ps, puffs_flags)) == NULL)
		DERR(EX_OSERR, "%s: puffs_init failed", __func__);

	ps->ps_pu = pu;

	/*
	 * Setup filesystem root
	 */
	pn_root = perfuse_new_pn(pu, "", NULL);
	PERFUSE_NODE_DATA(pn_root)->pnd_ino = FUSE_ROOT_ID; 
	PERFUSE_NODE_DATA(pn_root)->pnd_parent = pn_root;
	puffs_setroot(pu, pn_root);
	ps->ps_fsid = pn_root->pn_va.va_fsid;
	
	po_root = puffs_getrootpathobj(pu);
	if ((po_root->po_path = strdup("/")) == NULL)
		DERRX(EX_OSERR, "perfuse_mount_start() failed");
	
	po_root->po_len = 1;
	puffs_path_buildhash(pu, po_root);

	puffs_vattr_null(&pn_root->pn_va);
	pn_root->pn_va.va_type = VDIR;
	pn_root->pn_va.va_mode = 0755;
	
	ps->ps_root = pn_root;

	/* 
	 *  Callbacks
	 */
	ps->ps_new_msg = pc->pc_new_msg;
	ps->ps_xchg_msg = pc->pc_xchg_msg;
	ps->ps_destroy_msg = pc->pc_destroy_msg;
	ps->ps_get_inhdr = pc->pc_get_inhdr;
	ps->ps_get_inpayload = pc->pc_get_inpayload;
	ps->ps_get_outhdr = pc->pc_get_outhdr;
	ps->ps_get_outpayload = pc->pc_get_outpayload;
	ps->ps_umount = pc->pc_umount;

	return pu;
} 

void
perfuse_setspecific(pu, priv)
	struct puffs_usermount *pu;
	void *priv;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);
	ps->ps_private = priv;	

	return;
}

void *
perfuse_getspecific(pu)
	struct puffs_usermount *pu;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);

	return ps->ps_private;
}

int
perfuse_inloop(pu)
	struct puffs_usermount *pu;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);

	return ps->ps_flags & PS_INLOOP;
}

int
perfuse_mainloop(pu)
	struct puffs_usermount *pu;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);

	ps->ps_flags |= PS_INLOOP;
	if (puffs_mainloop(ps->ps_pu) != 0) {
		DERR(EX_OSERR, "%s: failed", __func__);
		return -1;
	}

	/* 
	 * Normal exit after unmount
	 */
	return 0;
}

/* ARGSUSED0 */
uint64_t
perfuse_get_ino(pu, opc)
	struct puffs_usermount *pu;
	puffs_cookie_t opc;
{
	return PERFUSE_NODE_DATA(opc)->pnd_ino;
}

int
perfuse_unmount(pu)
	struct puffs_usermount *pu;
{
	struct perfuse_state *ps;

	ps = puffs_getspecific(pu);

	return unmount(ps->ps_target, MNT_FORCE);
}