[BACK]Return to ccd.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev

File: [cvs.NetBSD.org] / src / sys / dev / ccd.c (download)

Revision 1.162, Fri Jan 2 19:42:06 2015 UTC (9 years, 3 months ago) by christos
Branch: MAIN
CVS Tags: nick-nhusb-base-20150606, nick-nhusb-base-20150406
Changes since 1.161: +3 -3 lines

We have three sets of DTYPE_ constants in the kernel:
	altq		Drop 		Type
	disklabel	Disk 		Type
	file		Descriptor	Type
(not to mention constants that contain the string DTYPE).
Let's make them two, by changing the disklabel one to be DisK TYPE since the
other disklabel constants seem to do that. Not many userland programs use
these constants (and the ones that they do are mostly in ifdefs). They will
be fixed shortly.

/*	$NetBSD: ccd.c,v 1.162 2015/01/02 19:42:06 christos Exp $	*/

/*-
 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Jason R. Thorpe, and by Andrew Doran.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * Copyright (c) 1988 University of Utah.
 * Copyright (c) 1990, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * the Systems Programming Group of the University of Utah Computer
 * Science Department.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * from: Utah $Hdr: cd.c 1.6 90/11/28$
 *
 *	@(#)cd.c	8.2 (Berkeley) 11/16/93
 */

/*
 * "Concatenated" disk driver.
 *
 * Notes on concurrency:
 *
 * => sc_dvlock serializes access to the device nodes, excluding block I/O.
 *
 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats,
 *    sc_stop, sc_bufq and b_resid from master buffers.
 *
 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to
 *    serialize I/O and configuration changes.
 *
 * => the in-core disk label does not change while the device is open.
 *
 * On memory consumption: ccd fans out I/O requests and so needs to
 * allocate memory.  If the system is desperately low on memory, we
 * single thread I/O.
 */

#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.162 2015/01/02 19:42:06 christos Exp $");

#if defined(_KERNEL_OPT)
#include "opt_compat_netbsd.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/errno.h>
#include <sys/buf.h>
#include <sys/kmem.h>
#include <sys/pool.h>
#include <sys/module.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/disklabel.h>
#include <sys/device.h>
#include <sys/disk.h>
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/vnode.h>
#include <sys/conf.h>
#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/kauth.h>
#include <sys/kthread.h>
#include <sys/bufq.h>
#include <sys/sysctl.h>

#include <uvm/uvm_extern.h>

#include <dev/ccdvar.h>
#include <dev/dkvar.h>

#include <miscfs/specfs/specdev.h> /* for v_rdev */

#if defined(CCDDEBUG) && !defined(DEBUG)
#define DEBUG
#endif

#ifdef DEBUG
#define CCDB_FOLLOW	0x01
#define CCDB_INIT	0x02
#define CCDB_IO		0x04
#define CCDB_LABEL	0x08
#define CCDB_VNODE	0x10
int ccddebug = 0x00;
#endif

#define	ccdunit(x)	DISKUNIT(x)

struct ccdbuf {
	struct buf	cb_buf;		/* new I/O buf */
	struct buf	*cb_obp;	/* ptr. to original I/O buf */
	struct ccd_softc *cb_sc;	/* pointer to ccd softc */
	int		cb_comp;	/* target component */
	SIMPLEQ_ENTRY(ccdbuf) cb_q;	/* fifo of component buffers */
};

/* component buffer pool */
static pool_cache_t ccd_cache;

#define	CCD_GETBUF()		pool_cache_get(ccd_cache, PR_WAITOK)
#define	CCD_PUTBUF(cbp)		pool_cache_put(ccd_cache, cbp)

#define CCDLABELDEV(dev)	\
	(MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART))

/* called by main() at boot time */
void	ccdattach(int);
void	ccddetach(void);

/* called by biodone() at interrupt time */
static void	ccdiodone(struct buf *);

static void	ccdinterleave(struct ccd_softc *);
static int	ccdinit(struct ccd_softc *, char **, struct vnode **,
		    struct lwp *);
static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *,
		    daddr_t, void *, long);
static void	ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *);
static void	ccdgetdisklabel(dev_t);
static void	ccdmakedisklabel(struct ccd_softc *);
static void	ccdstart(struct ccd_softc *);
static void	ccdthread(void *);

static dev_type_open(ccdopen);
static dev_type_close(ccdclose);
static dev_type_read(ccdread);
static dev_type_write(ccdwrite);
static dev_type_ioctl(ccdioctl);
static dev_type_strategy(ccdstrategy);
static dev_type_size(ccdsize);

const struct bdevsw ccd_bdevsw = {
	.d_open = ccdopen,
	.d_close = ccdclose,
	.d_strategy = ccdstrategy,
	.d_ioctl = ccdioctl,
	.d_dump = nodump,
	.d_psize = ccdsize,
	.d_discard = nodiscard,
	.d_flag = D_DISK | D_MPSAFE
};

const struct cdevsw ccd_cdevsw = {
	.d_open = ccdopen,
	.d_close = ccdclose,
	.d_read = ccdread,
	.d_write = ccdwrite,
	.d_ioctl = ccdioctl,
	.d_stop = nostop,
	.d_tty = notty,
	.d_poll = nopoll,
	.d_mmap = nommap,
	.d_kqfilter = nokqfilter,
	.d_discard = nodiscard,
	.d_flag = D_DISK | D_MPSAFE
};

#ifdef DEBUG
static	void printiinfo(struct ccdiinfo *);
#endif

static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds);
static kmutex_t ccd_lock;
static size_t ccd_nactive = 0;

static struct ccd_softc *
ccdcreate(int unit) {
	struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
	if (sc == NULL) {
#ifdef DIAGNOSTIC
		printf("%s: out of memory\n", __func__);
#endif
		return NULL;
	}
	/* Initialize per-softc structures. */
	snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit);
	mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE);
	sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
	cv_init(&sc->sc_stop, "ccdstop");
	cv_init(&sc->sc_push, "ccdthr");
	disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */
	return sc;
}

static void
ccddestroy(struct ccd_softc *sc) {
	mutex_obj_free(sc->sc_iolock);
	mutex_exit(&sc->sc_dvlock);
	mutex_destroy(&sc->sc_dvlock);
	cv_destroy(&sc->sc_stop);
	cv_destroy(&sc->sc_push);
	disk_destroy(&sc->sc_dkdev);
	kmem_free(sc, sizeof(*sc));
}

static struct ccd_softc *
ccdget(int unit, int make) {
	struct ccd_softc *sc;
	if (unit < 0) {
#ifdef DIAGNOSTIC
		panic("%s: unit %d!", __func__, unit);
#endif
		return NULL;
	}
	mutex_enter(&ccd_lock);
	LIST_FOREACH(sc, &ccds, sc_link) {
		if (sc->sc_unit == unit) {
			mutex_exit(&ccd_lock);
			return sc;
		}
	}
	mutex_exit(&ccd_lock);
	if (!make)
		return NULL;
	if ((sc = ccdcreate(unit)) == NULL)
		return NULL;
	mutex_enter(&ccd_lock);
	LIST_INSERT_HEAD(&ccds, sc, sc_link);
	ccd_nactive++;
	mutex_exit(&ccd_lock);
	return sc;
}

static void 
ccdput(struct ccd_softc *sc) {
	mutex_enter(&ccd_lock);
	LIST_REMOVE(sc, sc_link);
	ccd_nactive--;
	mutex_exit(&ccd_lock);
	ccddestroy(sc);
}

/*
 * Called by main() during pseudo-device attachment.  All we need
 * to do is allocate enough space for devices to be configured later.
 */
void
ccdattach(int num)
{
	mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE);

	/* Initialize the component buffer pool. */
	ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0,
	    0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL);
}

void
ccddetach(void)
{
	pool_cache_destroy(ccd_cache);
	mutex_destroy(&ccd_lock);
}

static int
ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp,
    struct lwp *l)
{
	struct ccdcinfo *ci = NULL;
	int ix;
	struct ccdgeom *ccg = &cs->sc_geom;
	char *tmppath;
	int error, path_alloced;
	uint64_t psize, minsize;
	unsigned secsize, maxsecsize;
	struct disk_geom *dg;

#ifdef DEBUG
	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
		printf("%s: ccdinit\n", cs->sc_xname);
#endif

	/* Allocate space for the component info. */
	cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo),
	    KM_SLEEP);
	tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP);

	cs->sc_size = 0;

	/*
	 * Verify that each component piece exists and record
	 * relevant information about it.
	 */
	maxsecsize = 0;
	minsize = 0;
	for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) {
		ci = &cs->sc_cinfo[ix];
		ci->ci_vp = vpp[ix];

		/*
		 * Copy in the pathname of the component.
		 */
		memset(tmppath, 0, MAXPATHLEN);	/* sanity */
		error = copyinstr(cpaths[ix], tmppath,
		    MAXPATHLEN, &ci->ci_pathlen);
		if (ci->ci_pathlen == 0)
			error = EINVAL;
		if (error) {
#ifdef DEBUG
			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
				printf("%s: can't copy path, error = %d\n",
				    cs->sc_xname, error);
#endif
			goto out;
		}
		ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP);
		memcpy(ci->ci_path, tmppath, ci->ci_pathlen);
		path_alloced++;

		/*
		 * XXX: Cache the component's dev_t.
		 */
		ci->ci_dev = vpp[ix]->v_rdev;

		/*
		 * Get partition information for the component.
		 */
		error = getdisksize(vpp[ix], &psize, &secsize);
		if (error) {
#ifdef DEBUG
			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
				 printf("%s: %s: disksize failed, error = %d\n",
				     cs->sc_xname, ci->ci_path, error);
#endif
			goto out;
		}

		/*
		 * Calculate the size, truncating to an interleave
		 * boundary if necessary.
		 */
		maxsecsize = secsize > maxsecsize ? secsize : maxsecsize;
		if (cs->sc_ileave > 1)
			psize -= psize % cs->sc_ileave;

		if (psize == 0) {
#ifdef DEBUG
			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
				printf("%s: %s: size == 0\n",
				    cs->sc_xname, ci->ci_path);
#endif
			error = ENODEV;
			goto out;
		}

		if (minsize == 0 || psize < minsize)
			minsize = psize;
		ci->ci_size = psize;
		cs->sc_size += psize;
	}

	/*
	 * Don't allow the interleave to be smaller than
	 * the biggest component sector.
	 */
	if ((cs->sc_ileave > 0) &&
	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
#ifdef DEBUG
		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
			printf("%s: interleave must be at least %d\n",
			    cs->sc_xname, (maxsecsize / DEV_BSIZE));
#endif
		error = EINVAL;
		goto out;
	}

	/*
	 * If uniform interleave is desired set all sizes to that of
	 * the smallest component.
	 */
	if (cs->sc_flags & CCDF_UNIFORM) {
		for (ci = cs->sc_cinfo;
		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
			ci->ci_size = minsize;

		cs->sc_size = cs->sc_nccdisks * minsize;
	}

	/*
	 * Construct the interleave table.
	 */
	ccdinterleave(cs);

	/*
	 * Create pseudo-geometry based on 1MB cylinders.  It's
	 * pretty close.
	 */
	ccg->ccg_secsize = DEV_BSIZE;
	ccg->ccg_ntracks = 1;
	ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize);
	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;

        dg = &cs->sc_dkdev.dk_geom;
        memset(dg, 0, sizeof(*dg));
	dg->dg_secperunit = cs->sc_size;
	dg->dg_secsize = ccg->ccg_secsize;
	dg->dg_nsectors = ccg->ccg_nsectors;
	dg->dg_ntracks = ccg->ccg_ntracks;
	dg->dg_ncylinders = ccg->ccg_ncylinders;
	
	if (cs->sc_ileave > 0)
	        aprint_normal("%s: Interleaving %d component%s "
	            "(%d block interleave)\n", cs->sc_xname,
        	    cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""),
        	    cs->sc_ileave);
	else
	        aprint_normal("%s: Concatenating %d component%s\n",
	            cs->sc_xname,
        	    cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""));
	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
		ci = &cs->sc_cinfo[ix];
		aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname,
		    ci->ci_path, (uintmax_t)ci->ci_size);
	}
	aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size);

	/*
	 * Create thread to handle deferred I/O.
	 */
	cs->sc_zap = false;
	error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread,
	    cs, &cs->sc_thread, "%s", cs->sc_xname);
	if (error) {
		printf("ccdinit: can't create thread: %d\n", error);
		goto out;
	}

	/*
	 * Only now that everything is set up can we enable the device.
	 */
	mutex_enter(cs->sc_iolock);
	cs->sc_flags |= CCDF_INITED;
	mutex_exit(cs->sc_iolock);
	kmem_free(tmppath, MAXPATHLEN);
	return (0);

 out:
	for (ix = 0; ix < path_alloced; ix++) {
		kmem_free(cs->sc_cinfo[ix].ci_path,
		    cs->sc_cinfo[ix].ci_pathlen);
	}
	kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo));
	kmem_free(tmppath, MAXPATHLEN);
	return (error);
}

static void
ccdinterleave(struct ccd_softc *cs)
{
	struct ccdcinfo *ci, *smallci;
	struct ccdiinfo *ii;
	daddr_t bn, lbn;
	int ix;
	u_long size;

#ifdef DEBUG
	if (ccddebug & CCDB_INIT)
		printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave);
#endif
	/*
	 * Allocate an interleave table.
	 * Chances are this is too big, but we don't care.
	 */
	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
	cs->sc_itable = kmem_zalloc(size, KM_SLEEP);

	/*
	 * Trivial case: no interleave (actually interleave of disk size).
	 * Each table entry represents a single component in its entirety.
	 */
	if (cs->sc_ileave == 0) {
		bn = 0;
		ii = cs->sc_itable;

		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
			/* Allocate space for ii_index. */
			ii->ii_indexsz = sizeof(int);
			ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);
			ii->ii_ndisk = 1;
			ii->ii_startblk = bn;
			ii->ii_startoff = 0;
			ii->ii_index[0] = ix;
			bn += cs->sc_cinfo[ix].ci_size;
			ii++;
		}
		ii->ii_ndisk = 0;
#ifdef DEBUG
		if (ccddebug & CCDB_INIT)
			printiinfo(cs->sc_itable);
#endif
		return;
	}

	/*
	 * The following isn't fast or pretty; it doesn't have to be.
	 */
	size = 0;
	bn = lbn = 0;
	for (ii = cs->sc_itable; ; ii++) {
		/* Allocate space for ii_index. */
		ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks;
		ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP);

		/*
		 * Locate the smallest of the remaining components
		 */
		smallci = NULL;
		for (ci = cs->sc_cinfo;
		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
			if (ci->ci_size > size &&
			    (smallci == NULL ||
			     ci->ci_size < smallci->ci_size))
				smallci = ci;

		/*
		 * Nobody left, all done
		 */
		if (smallci == NULL) {
			ii->ii_ndisk = 0;
			break;
		}

		/*
		 * Record starting logical block and component offset
		 */
		ii->ii_startblk = bn / cs->sc_ileave;
		ii->ii_startoff = lbn;

		/*
		 * Determine how many disks take part in this interleave
		 * and record their indices.
		 */
		ix = 0;
		for (ci = cs->sc_cinfo;
		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++)
			if (ci->ci_size >= smallci->ci_size)
				ii->ii_index[ix++] = ci - cs->sc_cinfo;
		ii->ii_ndisk = ix;
		bn += ix * (smallci->ci_size - size);
		lbn = smallci->ci_size / cs->sc_ileave;
		size = smallci->ci_size;
	}
#ifdef DEBUG
	if (ccddebug & CCDB_INIT)
		printiinfo(cs->sc_itable);
#endif
}

/* ARGSUSED */
static int
ccdopen(dev_t dev, int flags, int fmt, struct lwp *l)
{
	int unit = ccdunit(dev);
	struct ccd_softc *cs;
	struct disklabel *lp;
	int error = 0, part, pmask;

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags);
#endif
	if ((cs = ccdget(unit, 1)) == NULL)
		return ENXIO;

	mutex_enter(&cs->sc_dvlock);

	lp = cs->sc_dkdev.dk_label;

	part = DISKPART(dev);
	pmask = (1 << part);

	/*
	 * If we're initialized, check to see if there are any other
	 * open partitions.  If not, then it's safe to update
	 * the in-core disklabel.  Only read the disklabel if it is
	 * not already valid.
	 */
	if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED &&
	    cs->sc_dkdev.dk_openmask == 0)
		ccdgetdisklabel(dev);

	/* Check that the partition exists. */
	if (part != RAW_PART) {
		if (((cs->sc_flags & CCDF_INITED) == 0) ||
		    ((part >= lp->d_npartitions) ||
		     (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
			error = ENXIO;
			goto done;
		}
	}

	/* Prevent our unit from being unconfigured while open. */
	switch (fmt) {
	case S_IFCHR:
		cs->sc_dkdev.dk_copenmask |= pmask;
		break;

	case S_IFBLK:
		cs->sc_dkdev.dk_bopenmask |= pmask;
		break;
	}
	cs->sc_dkdev.dk_openmask =
	    cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;

 done:
	mutex_exit(&cs->sc_dvlock);
	return (error);
}

/* ARGSUSED */
static int
ccdclose(dev_t dev, int flags, int fmt, struct lwp *l)
{
	int unit = ccdunit(dev);
	struct ccd_softc *cs;
	int part;

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags);
#endif

	if ((cs = ccdget(unit, 0)) == NULL)
		return ENXIO;

	mutex_enter(&cs->sc_dvlock);

	part = DISKPART(dev);

	/* ...that much closer to allowing unconfiguration... */
	switch (fmt) {
	case S_IFCHR:
		cs->sc_dkdev.dk_copenmask &= ~(1 << part);
		break;

	case S_IFBLK:
		cs->sc_dkdev.dk_bopenmask &= ~(1 << part);
		break;
	}
	cs->sc_dkdev.dk_openmask =
	    cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask;

	if (cs->sc_dkdev.dk_openmask == 0) {
		if ((cs->sc_flags & CCDF_KLABEL) == 0)
			cs->sc_flags &= ~CCDF_VLABEL;
	}

	mutex_exit(&cs->sc_dvlock);
	return (0);
}

static bool
ccdbackoff(struct ccd_softc *cs)
{

	/* XXX Arbitrary, should be a uvm call. */
	return uvmexp.free < (uvmexp.freemin >> 1) &&
	    disk_isbusy(&cs->sc_dkdev);
}

static void
ccdthread(void *cookie)
{
	struct ccd_softc *cs;

	cs = cookie;

#ifdef DEBUG
 	if (ccddebug & CCDB_FOLLOW)
 		printf("ccdthread: hello\n");
#endif

	mutex_enter(cs->sc_iolock);
	while (__predict_true(!cs->sc_zap)) {
		if (bufq_peek(cs->sc_bufq) == NULL) {
			/* Nothing to do. */
			cv_wait(&cs->sc_push, cs->sc_iolock);
			continue;
		}
		if (ccdbackoff(cs)) {
			/* Wait for memory to become available. */
			(void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1);
			continue;
		}
#ifdef DEBUG
 		if (ccddebug & CCDB_FOLLOW)
 			printf("ccdthread: dispatching I/O\n");
#endif
		ccdstart(cs);
		mutex_enter(cs->sc_iolock);
	}
	cs->sc_thread = NULL;
	mutex_exit(cs->sc_iolock);
#ifdef DEBUG
 	if (ccddebug & CCDB_FOLLOW)
 		printf("ccdthread: goodbye\n");
#endif
	kthread_exit(0);
}

static void
ccdstrategy(struct buf *bp)
{
	int unit = ccdunit(bp->b_dev);
	struct ccd_softc *cs;
	if ((cs = ccdget(unit, 0)) == NULL)
		return;

	/* Must be open or reading label. */
	KASSERT(cs->sc_dkdev.dk_openmask != 0 ||
	    (cs->sc_flags & CCDF_RLABEL) != 0);

	mutex_enter(cs->sc_iolock);
	/* Synchronize with device init/uninit. */
	if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) {
		mutex_exit(cs->sc_iolock);
#ifdef DEBUG
 		if (ccddebug & CCDB_FOLLOW)
 			printf("ccdstrategy: unit %d: not inited\n", unit);
#endif
 		bp->b_error = ENXIO;
 		bp->b_resid = bp->b_bcount;
 		biodone(bp);
		return;
	}

	/* Defer to thread if system is low on memory. */
	bufq_put(cs->sc_bufq, bp);
	if (__predict_false(ccdbackoff(cs))) {
		mutex_exit(cs->sc_iolock);
#ifdef DEBUG
 		if (ccddebug & CCDB_FOLLOW)
 			printf("ccdstrategy: holding off on I/O\n");
#endif
		return;
	}
	ccdstart(cs);
}

static void
ccdstart(struct ccd_softc *cs)
{
	daddr_t blkno;
	int wlabel;
	struct disklabel *lp;
	long bcount, rcount;
	struct ccdbuf *cbp;
	char *addr;
	daddr_t bn;
	vnode_t *vp;
	buf_t *bp;

	KASSERT(mutex_owned(cs->sc_iolock));

	disk_busy(&cs->sc_dkdev);
	bp = bufq_get(cs->sc_bufq);
	KASSERT(bp != NULL);

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdstart(%s, %p)\n", cs->sc_xname, bp);
#endif

	/* If it's a nil transfer, wake up the top half now. */
	if (bp->b_bcount == 0)
		goto done;

	lp = cs->sc_dkdev.dk_label;

	/*
	 * Do bounds checking and adjust transfer.  If there's an
	 * error, the bounds check will flag that for us.  Convert
	 * the partition relative block number to an absolute.
	 */
	blkno = bp->b_blkno;
	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
	if (DISKPART(bp->b_dev) != RAW_PART) {
		if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0)
			goto done;
		blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
	}
	mutex_exit(cs->sc_iolock);
	bp->b_rawblkno = blkno;

	/* Allocate the component buffers and start I/O! */
	bp->b_resid = bp->b_bcount;
	bn = bp->b_rawblkno;
	addr = bp->b_data;
	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
		cbp = ccdbuffer(cs, bp, bn, addr, bcount);
		rcount = cbp->cb_buf.b_bcount;
		bn += btodb(rcount);
		addr += rcount;
		vp = cbp->cb_buf.b_vp;
		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
			mutex_enter(vp->v_interlock);
			vp->v_numoutput++;
			mutex_exit(vp->v_interlock);
		}
		(void)VOP_STRATEGY(vp, &cbp->cb_buf);
	}
	return;

 done:
	disk_unbusy(&cs->sc_dkdev, 0, 0);
	cv_broadcast(&cs->sc_stop);
	cv_broadcast(&cs->sc_push);
	mutex_exit(cs->sc_iolock);
	bp->b_resid = bp->b_bcount;
	biodone(bp);
}

/*
 * Build a component buffer header.
 */
static struct ccdbuf *
ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr,
    long bcount)
{
	struct ccdcinfo *ci;
	struct ccdbuf *cbp;
	daddr_t cbn, cboff;
	u_int64_t cbc;
	int ccdisk;

#ifdef DEBUG
	if (ccddebug & CCDB_IO)
		printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n",
		       cs, bp, bn, addr, bcount);
#endif
	/*
	 * Determine which component bn falls in.
	 */
	cbn = bn;
	cboff = 0;

	/*
	 * Serially concatenated
	 */
	if (cs->sc_ileave == 0) {
		daddr_t sblk;

		sblk = 0;
		for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk];
		    cbn >= sblk + ci->ci_size;
		    ccdisk++, ci = &cs->sc_cinfo[ccdisk])
			sblk += ci->ci_size;
		cbn -= sblk;
	}
	/*
	 * Interleaved
	 */
	else {
		struct ccdiinfo *ii;
		int off;

		cboff = cbn % cs->sc_ileave;
		cbn /= cs->sc_ileave;
		for (ii = cs->sc_itable; ii->ii_ndisk; ii++)
			if (ii->ii_startblk > cbn)
				break;
		ii--;
		off = cbn - ii->ii_startblk;
		if (ii->ii_ndisk == 1) {
			ccdisk = ii->ii_index[0];
			cbn = ii->ii_startoff + off;
		} else {
			ccdisk = ii->ii_index[off % ii->ii_ndisk];
			cbn = ii->ii_startoff + off / ii->ii_ndisk;
		}
		cbn *= cs->sc_ileave;
		ci = &cs->sc_cinfo[ccdisk];
	}

	/*
	 * Fill in the component buf structure.
	 */
	cbp = CCD_GETBUF();
	KASSERT(cbp != NULL);
	buf_init(&cbp->cb_buf);
	cbp->cb_buf.b_flags = bp->b_flags;
	cbp->cb_buf.b_oflags = bp->b_oflags;
	cbp->cb_buf.b_cflags = bp->b_cflags;
	cbp->cb_buf.b_iodone = ccdiodone;
	cbp->cb_buf.b_proc = bp->b_proc;
	cbp->cb_buf.b_dev = ci->ci_dev;
	cbp->cb_buf.b_blkno = cbn + cboff;
	cbp->cb_buf.b_data = addr;
	cbp->cb_buf.b_vp = ci->ci_vp;
	cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock;
	if (cs->sc_ileave == 0)
		cbc = dbtob((u_int64_t)(ci->ci_size - cbn));
	else
		cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff));
	cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount;

	/*
	 * context for ccdiodone
	 */
	cbp->cb_obp = bp;
	cbp->cb_sc = cs;
	cbp->cb_comp = ccdisk;

	BIO_COPYPRIO(&cbp->cb_buf, bp);

#ifdef DEBUG
	if (ccddebug & CCDB_IO)
		printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p"
		       " bcnt %d\n",
		    ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp,
		    cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
		    cbp->cb_buf.b_bcount);
#endif

	return (cbp);
}

/*
 * Called at interrupt time.
 * Mark the component as done and if all components are done,
 * take a ccd interrupt.
 */
static void
ccdiodone(struct buf *vbp)
{
	struct ccdbuf *cbp = (struct ccdbuf *) vbp;
	struct buf *bp = cbp->cb_obp;
	struct ccd_softc *cs = cbp->cb_sc;
	int count;

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdiodone(%p)\n", cbp);
	if (ccddebug & CCDB_IO) {
		printf("ccdiodone: bp %p bcount %d resid %d\n",
		       bp, bp->b_bcount, bp->b_resid);
		printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p"
		       " bcnt %d\n",
		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
		       cbp->cb_buf.b_bcount);
	}
#endif

	if (cbp->cb_buf.b_error != 0) {
		bp->b_error = cbp->cb_buf.b_error;
		printf("%s: error %d on component %d\n",
		       cs->sc_xname, bp->b_error, cbp->cb_comp);
	}
	count = cbp->cb_buf.b_bcount;
	buf_destroy(&cbp->cb_buf);
	CCD_PUTBUF(cbp);

	/*
	 * If all done, "interrupt".
	 */
	mutex_enter(cs->sc_iolock);
	bp->b_resid -= count;
	if (bp->b_resid < 0)
		panic("ccdiodone: count");
	if (bp->b_resid == 0) {
		/*
		 * Request is done for better or worse, wakeup the top half.
		 */
		if (bp->b_error != 0)
			bp->b_resid = bp->b_bcount;
		disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid),
		    (bp->b_flags & B_READ));
		if (!disk_isbusy(&cs->sc_dkdev)) {
			if (bufq_peek(cs->sc_bufq) != NULL) {
				cv_broadcast(&cs->sc_push);
			}
			cv_broadcast(&cs->sc_stop);
		}
		mutex_exit(cs->sc_iolock);
		biodone(bp);
	} else
		mutex_exit(cs->sc_iolock);
}

/* ARGSUSED */
static int
ccdread(dev_t dev, struct uio *uio, int flags)
{
	int unit = ccdunit(dev);
	struct ccd_softc *cs;

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdread(0x%"PRIx64", %p)\n", dev, uio);
#endif
	if ((cs = ccdget(unit, 0)) == NULL)
		return 0;

	/* Unlocked advisory check, ccdstrategy check is synchronous. */
	if ((cs->sc_flags & CCDF_INITED) == 0)
		return (ENXIO);

	return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio));
}

/* ARGSUSED */
static int
ccdwrite(dev_t dev, struct uio *uio, int flags)
{
	int unit = ccdunit(dev);
	struct ccd_softc *cs;

#ifdef DEBUG
	if (ccddebug & CCDB_FOLLOW)
		printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio);
#endif
	if ((cs = ccdget(unit, 0)) == NULL)
		return ENOENT;

	/* Unlocked advisory check, ccdstrategy check is synchronous. */
	if ((cs->sc_flags & CCDF_INITED) == 0)
		return (ENXIO);

	return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio));
}

static int
ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
	int unit = ccdunit(dev);
	int i, j, lookedup = 0, error = 0;
	int part, pmask, make;
	struct ccd_softc *cs;
	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
	kauth_cred_t uc;
	char **cpp;
	struct pathbuf *pb;
	struct vnode **vpp;
#ifdef __HAVE_OLD_DISKLABEL
	struct disklabel newlabel;
#endif

	switch (cmd) {
#if defined(COMPAT_60) && !defined(_LP64)
	case CCDIOCSET_60:
#endif
	case CCDIOCSET:
		make = 1;
		break;
	default:
		make = 0;
		break;
	}

	if ((cs = ccdget(unit, make)) == NULL)
		return ENOENT;
	uc = kauth_cred_get();

/*
 * Compat code must not be called if on a platform where
 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will
 * be the same as CCDIOCSET_60
 */
#if defined(COMPAT_60) && !defined(_LP64)
	switch (cmd) {
	case CCDIOCSET_60: {
		struct ccd_ioctl ccionew;
       		struct ccd_ioctl_60 *ccio60 =
       		    (struct ccd_ioctl_60 *)data;
		ccionew.ccio_disks = ccio->ccio_disks;
		ccionew.ccio_ndisks = ccio->ccio_ndisks;
		ccionew.ccio_ileave = ccio->ccio_ileave;
		ccionew.ccio_flags = ccio->ccio_flags;
		ccionew.ccio_unit = ccio->ccio_unit;
		error = ccdioctl(dev, CCDIOCSET, &ccionew, flag, l);
		if (!error) {
			/* Copy data back, adjust types if necessary */
			ccio60->ccio_disks = ccionew.ccio_disks;
			ccio60->ccio_ndisks = ccionew.ccio_ndisks;
			ccio60->ccio_ileave = ccionew.ccio_ileave;
			ccio60->ccio_flags = ccionew.ccio_flags;
			ccio60->ccio_unit = ccionew.ccio_unit;
			ccio60->ccio_size = (size_t)ccionew.ccio_size;
		}
		return error;
		}
		break;

	case CCDIOCCLR_60:
		/*
		 * ccio_size member not used, so existing struct OK
		 * drop through to existing non-compat version
		 */
		cmd = CCDIOCCLR;
		break;
	}
#endif /* COMPAT_60 && !_LP64*/

	/* Must be open for writes for these commands... */
	switch (cmd) {
	case CCDIOCSET:
	case CCDIOCCLR:
	case DIOCSDINFO:
	case DIOCWDINFO:
	case DIOCCACHESYNC:
	case DIOCAWEDGE:
	case DIOCDWEDGE:
	case DIOCMWEDGES:
#ifdef __HAVE_OLD_DISKLABEL
	case ODIOCSDINFO:
	case ODIOCWDINFO:
#endif
	case DIOCKLABEL:
	case DIOCWLABEL:
		if ((flag & FWRITE) == 0)
			return (EBADF);
	}

	mutex_enter(&cs->sc_dvlock);

	/* Must be initialized for these... */
	switch (cmd) {
	case CCDIOCCLR:
	case DIOCGDINFO:
	case DIOCCACHESYNC:
	case DIOCAWEDGE:
	case DIOCDWEDGE:
	case DIOCLWEDGES:
	case DIOCMWEDGES:
	case DIOCSDINFO:
	case DIOCWDINFO:
	case DIOCGPART:
	case DIOCWLABEL:
	case DIOCKLABEL:
	case DIOCGDEFLABEL:
#ifdef __HAVE_OLD_DISKLABEL
	case ODIOCGDINFO:
	case ODIOCSDINFO:
	case ODIOCWDINFO:
	case ODIOCGDEFLABEL:
#endif
		if ((cs->sc_flags & CCDF_INITED) == 0) {
			error = ENXIO;
			goto out;
		}
	}

	error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 
	if (error != EPASSTHROUGH)
		goto out;

	error = 0;
	switch (cmd) {
	case CCDIOCSET:
		if (cs->sc_flags & CCDF_INITED) {
			error = EBUSY;
			goto out;
		}

		/* Validate the flags. */
		if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) {
			error = EINVAL;
			goto out;
		}

		if (ccio->ccio_ndisks > CCD_MAXNDISKS ||
		    ccio->ccio_ndisks == 0) {
			error = EINVAL;
			goto out;
		}

		/* Fill in some important bits. */
		cs->sc_ileave = ccio->ccio_ileave;
		cs->sc_nccdisks = ccio->ccio_ndisks;
		cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK;

		/*
		 * Allocate space for and copy in the array of
		 * component pathnames and device numbers.
		 */
		cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP);
		vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP);
		error = copyin(ccio->ccio_disks, cpp,
		    ccio->ccio_ndisks * sizeof(*cpp));
		if (error) {
			kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
			kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
			goto out;
		}

#ifdef DEBUG
		if (ccddebug & CCDB_INIT)
			for (i = 0; i < ccio->ccio_ndisks; ++i)
				printf("ccdioctl: component %d: %p\n",
				    i, cpp[i]);
#endif

		for (i = 0; i < ccio->ccio_ndisks; ++i) {
#ifdef DEBUG
			if (ccddebug & CCDB_INIT)
				printf("ccdioctl: lookedup = %d\n", lookedup);
#endif
			error = pathbuf_copyin(cpp[i], &pb);
			if (error == 0) {
				error = dk_lookup(pb, l, &vpp[i]);
			}
			pathbuf_destroy(pb);
			if (error != 0) {
				for (j = 0; j < lookedup; ++j)
					(void)vn_close(vpp[j], FREAD|FWRITE,
					    uc);
				kmem_free(vpp, ccio->ccio_ndisks *
				    sizeof(*vpp));
				kmem_free(cpp, ccio->ccio_ndisks *
				    sizeof(*cpp));
				goto out;
			}
			++lookedup;
		}

		/* Attach the disk. */
		disk_attach(&cs->sc_dkdev);
		bufq_alloc(&cs->sc_bufq, "fcfs", 0);

		/*
		 * Initialize the ccd.  Fills in the softc for us.
		 */
		if ((error = ccdinit(cs, cpp, vpp, l)) != 0) {
			for (j = 0; j < lookedup; ++j)
				(void)vn_close(vpp[j], FREAD|FWRITE,
				    uc);
			kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
			kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));
			disk_detach(&cs->sc_dkdev);
			bufq_free(cs->sc_bufq);
			goto out;
		}

		/* We can free the temporary variables now. */
		kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp));
		kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp));

		/*
		 * The ccd has been successfully initialized, so
		 * we can place it into the array.  Don't try to
		 * read the disklabel until the disk has been attached,
		 * because space for the disklabel is allocated
		 * in disk_attach();
		 */
		ccio->ccio_unit = unit;
		ccio->ccio_size = cs->sc_size;

		/* Try and read the disklabel. */
		ccdgetdisklabel(dev);
		disk_set_info(NULL, &cs->sc_dkdev, NULL);

		/* discover wedges */
		mutex_exit(&cs->sc_dvlock);
		dkwedge_discover(&cs->sc_dkdev);
		return 0;

	case CCDIOCCLR:
		/*
		 * Don't unconfigure if any other partitions are open
		 * or if both the character and block flavors of this
		 * partition are open.
		 */
		part = DISKPART(dev);
		pmask = (1 << part);
		if ((cs->sc_dkdev.dk_openmask & ~pmask) ||
		    ((cs->sc_dkdev.dk_bopenmask & pmask) &&
		    (cs->sc_dkdev.dk_copenmask & pmask))) {
			error = EBUSY;
			goto out;
		}

		/* Delete all of our wedges. */
		dkwedge_delall(&cs->sc_dkdev);

		/* Stop new I/O, wait for in-flight I/O to complete. */
		mutex_enter(cs->sc_iolock);
		cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL);
		cs->sc_zap = true;
		while (disk_isbusy(&cs->sc_dkdev) ||
		    bufq_peek(cs->sc_bufq) != NULL ||
		    cs->sc_thread != NULL) {
			cv_broadcast(&cs->sc_push);
			(void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz);
		}
		mutex_exit(cs->sc_iolock);

		/*
		 * Free ccd_softc information and clear entry.
		 */

		/* Close the components and free their pathnames. */
		for (i = 0; i < cs->sc_nccdisks; ++i) {
			/*
			 * XXX: this close could potentially fail and
			 * cause Bad Things.  Maybe we need to force
			 * the close to happen?
			 */
#ifdef DEBUG
			if (ccddebug & CCDB_VNODE)
				vprint("CCDIOCCLR: vnode info",
				    cs->sc_cinfo[i].ci_vp);
#endif
			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE,
			    uc);
			kmem_free(cs->sc_cinfo[i].ci_path,
			    cs->sc_cinfo[i].ci_pathlen);
		}

		/* Free interleave index. */
		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) {
			kmem_free(cs->sc_itable[i].ii_index,
			    cs->sc_itable[i].ii_indexsz);
		}

		/* Free component info and interleave table. */
		kmem_free(cs->sc_cinfo, cs->sc_nccdisks *
		    sizeof(struct ccdcinfo));
		kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) *
		    sizeof(struct ccdiinfo));

		aprint_normal("%s: detached\n", cs->sc_xname);

		/* Detach the disk. */
		disk_detach(&cs->sc_dkdev);
		bufq_free(cs->sc_bufq);
		ccdput(cs);
		/* Don't break, otherwise cs is read again. */
		return 0;

	case DIOCCACHESYNC:
		/*
		 * We pass this call down to all components and report
		 * the first error we encounter.
		 */
		for (error = 0, i = 0; i < cs->sc_nccdisks; i++) {
			j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data,
				      flag, uc);
			if (j != 0 && error == 0)
				error = j;
		}
		break;

	case DIOCWDINFO:
	case DIOCSDINFO:
#ifdef __HAVE_OLD_DISKLABEL
	case ODIOCWDINFO:
	case ODIOCSDINFO:
#endif
	{
		struct disklabel *lp;
#ifdef __HAVE_OLD_DISKLABEL
		if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
			memset(&newlabel, 0, sizeof newlabel);
			memcpy(&newlabel, data, sizeof (struct olddisklabel));
			lp = &newlabel;
		} else
#endif
		lp = (struct disklabel *)data;

		cs->sc_flags |= CCDF_LABELLING;

		error = setdisklabel(cs->sc_dkdev.dk_label,
		    lp, 0, cs->sc_dkdev.dk_cpulabel);
		if (error == 0) {
			if (cmd == DIOCWDINFO
#ifdef __HAVE_OLD_DISKLABEL
			    || cmd == ODIOCWDINFO
#endif
			   )
				error = writedisklabel(CCDLABELDEV(dev),
				    ccdstrategy, cs->sc_dkdev.dk_label,
				    cs->sc_dkdev.dk_cpulabel);
		}

		cs->sc_flags &= ~CCDF_LABELLING;
		break;
	}

	case DIOCKLABEL:
		if (*(int *)data != 0)
			cs->sc_flags |= CCDF_KLABEL;
		else
			cs->sc_flags &= ~CCDF_KLABEL;
		break;

	case DIOCWLABEL:
		if (*(int *)data != 0)
			cs->sc_flags |= CCDF_WLABEL;
		else
			cs->sc_flags &= ~CCDF_WLABEL;
		break;

	case DIOCGDEFLABEL:
		ccdgetdefaultlabel(cs, (struct disklabel *)data);
		break;

#ifdef __HAVE_OLD_DISKLABEL
	case ODIOCGDEFLABEL:
		ccdgetdefaultlabel(cs, &newlabel);
		if (newlabel.d_npartitions > OLDMAXPARTITIONS)
			return ENOTTY;
		memcpy(data, &newlabel, sizeof (struct olddisklabel));
		break;
#endif

	default:
		error = ENOTTY;
	}

 out:
	mutex_exit(&cs->sc_dvlock);
	return (error);
}

static int
ccdsize(dev_t dev)
{
	struct ccd_softc *cs;
	struct disklabel *lp;
	int part, unit, omask, size;

	unit = ccdunit(dev);
	if ((cs = ccdget(unit, 0)) == NULL)
		return -1;

	if ((cs->sc_flags & CCDF_INITED) == 0)
		return (-1);

	part = DISKPART(dev);
	omask = cs->sc_dkdev.dk_openmask & (1 << part);
	lp = cs->sc_dkdev.dk_label;

	if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp))
		return (-1);

	if (lp->d_partitions[part].p_fstype != FS_SWAP)
		size = -1;
	else
		size = lp->d_partitions[part].p_size *
		    (lp->d_secsize / DEV_BSIZE);

	if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp))
		return (-1);

	return (size);
}

static void
ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp)
{
	struct ccdgeom *ccg = &cs->sc_geom;

	memset(lp, 0, sizeof(*lp));

	if (cs->sc_size > UINT32_MAX)
		lp->d_secperunit = UINT32_MAX;
	else
		lp->d_secperunit = cs->sc_size;
	lp->d_secsize = ccg->ccg_secsize;
	lp->d_nsectors = ccg->ccg_nsectors;
	lp->d_ntracks = ccg->ccg_ntracks;
	lp->d_ncylinders = ccg->ccg_ncylinders;
	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;

	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
	lp->d_type = DKTYPE_CCD;
	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
	lp->d_rpm = 3600;
	lp->d_interleave = 1;
	lp->d_flags = 0;

	lp->d_partitions[RAW_PART].p_offset = 0;
	lp->d_partitions[RAW_PART].p_size = lp->d_secperunit;
	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
	lp->d_npartitions = RAW_PART + 1;

	lp->d_magic = DISKMAGIC;
	lp->d_magic2 = DISKMAGIC;
	lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label);
}

/*
 * Read the disklabel from the ccd.  If one is not present, fake one
 * up.
 */
static void
ccdgetdisklabel(dev_t dev)
{
	int unit = ccdunit(dev);
	struct ccd_softc *cs;
	const char *errstring;
	struct disklabel *lp;
	struct cpu_disklabel *clp;

	if ((cs = ccdget(unit, 0)) == NULL)
		return;
	lp = cs->sc_dkdev.dk_label;
	clp = cs->sc_dkdev.dk_cpulabel;
	KASSERT(mutex_owned(&cs->sc_dvlock));

	memset(clp, 0, sizeof(*clp));

	ccdgetdefaultlabel(cs, lp);

	/*
	 * Call the generic disklabel extraction routine.
	 */
	cs->sc_flags |= CCDF_RLABEL;
	if ((cs->sc_flags & CCDF_NOLABEL) != 0)
		errstring = "CCDF_NOLABEL set; ignoring on-disk label";
	else
		errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy,
		    cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel);
	if (errstring)
		ccdmakedisklabel(cs);
	else {
		int i;
		struct partition *pp;

		/*
		 * Sanity check whether the found disklabel is valid.
		 *
		 * This is necessary since total size of ccd may vary
		 * when an interleave is changed even though exactly
		 * same componets are used, and old disklabel may used
		 * if that is found.
		 */
		if (lp->d_secperunit < UINT32_MAX ?
			lp->d_secperunit != cs->sc_size :
			lp->d_secperunit > cs->sc_size)
			printf("WARNING: %s: "
			    "total sector size in disklabel (%ju) != "
			    "the size of ccd (%ju)\n", cs->sc_xname,
			    (uintmax_t)lp->d_secperunit,
			    (uintmax_t)cs->sc_size);
		for (i = 0; i < lp->d_npartitions; i++) {
			pp = &lp->d_partitions[i];
			if (pp->p_offset + pp->p_size > cs->sc_size)
				printf("WARNING: %s: end of partition `%c' "
				    "exceeds the size of ccd (%ju)\n",
				    cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size);
		}
	}

#ifdef DEBUG
	/* It's actually extremely common to have unlabeled ccds. */
	if (ccddebug & CCDB_LABEL)
		if (errstring != NULL)
			printf("%s: %s\n", cs->sc_xname, errstring);
#endif

	/* In-core label now valid. */
	cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL;
}

/*
 * Take care of things one might want to take care of in the event
 * that a disklabel isn't present.
 */
static void
ccdmakedisklabel(struct ccd_softc *cs)
{
	struct disklabel *lp = cs->sc_dkdev.dk_label;

	/*
	 * For historical reasons, if there's no disklabel present
	 * the raw partition must be marked FS_BSDFFS.
	 */
	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;

	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));

	lp->d_checksum = dkcksum(lp);
}

#ifdef DEBUG
static void
printiinfo(struct ccdiinfo *ii)
{
	int ix, i;

	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
		printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64,
		    ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
		for (i = 0; i < ii->ii_ndisk; i++)
			printf(" %d", ii->ii_index[i]);
		printf("\n");
	}
}
#endif

MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr");

static int
ccd_modcmd(modcmd_t cmd, void *arg)
{
	int error = 0;
#ifdef _MODULE
	int bmajor = -1, cmajor = -1;
#endif


	switch (cmd) {
	case MODULE_CMD_INIT:
#ifdef _MODULE
		ccdattach(0);

		error = devsw_attach("ccd", &ccd_bdevsw, &bmajor,
		    &ccd_cdevsw, &cmajor);
#endif
		break;

	case MODULE_CMD_FINI:
#ifdef _MODULE
		mutex_enter(&ccd_lock);
		if (ccd_nactive) {
			mutex_exit(&ccd_lock);
			error = EBUSY;
		} else {
			mutex_exit(&ccd_lock);
			error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw);
			ccddetach();
		}
#endif
		break;

	case MODULE_CMD_STAT:
		return ENOTTY;

	default:
		return ENOTTY;
	}

	return error;
}

static int
ccd_units_sysctl(SYSCTLFN_ARGS)
{
	struct sysctlnode node;
	struct ccd_softc *sc;
	int error, i, nccd, *units;
	size_t size;

	nccd = 0;
	mutex_enter(&ccd_lock);
	LIST_FOREACH(sc, &ccds, sc_link)
		nccd++;
	mutex_exit(&ccd_lock);

	if (nccd != 0) {
		size = nccd * sizeof(*units);
		units = kmem_zalloc(size, KM_SLEEP);
		if (units == NULL)
			return ENOMEM;

		i = 0;
		mutex_enter(&ccd_lock);
		LIST_FOREACH(sc, &ccds, sc_link) {
			if (i >= nccd)
				break;
			units[i] = sc->sc_unit;
		}
		mutex_exit(&ccd_lock);
	} else {
		units = NULL;
		size = 0;
	}

	node = *rnode;
	node.sysctl_data = units;
	node.sysctl_size = size;

	error = sysctl_lookup(SYSCTLFN_CALL(&node));
	if (units)
		kmem_free(units, size);
	return error;
}

static int
ccd_info_sysctl(SYSCTLFN_ARGS)
{
	struct sysctlnode node;
	struct ccddiskinfo ccd;
	struct ccd_softc *sc;
	int unit;

	if (newp == NULL || newlen != sizeof(int))
		return EINVAL;

	unit = *(const int *)newp;
	newp = NULL;
	newlen = 0;
	ccd.ccd_ndisks = ~0;
	mutex_enter(&ccd_lock);
	LIST_FOREACH(sc, &ccds, sc_link) {
		if (sc->sc_unit == unit) {
			ccd.ccd_ileave = sc->sc_ileave;
			ccd.ccd_size = sc->sc_size;
			ccd.ccd_ndisks = sc->sc_nccdisks;
			ccd.ccd_flags = sc->sc_flags;
			break;
		}
	}
	mutex_exit(&ccd_lock);

	if (ccd.ccd_ndisks == ~0)
		return ENOENT;

	node = *rnode;
	node.sysctl_data = &ccd;
	node.sysctl_size = sizeof(ccd);

	return sysctl_lookup(SYSCTLFN_CALL(&node));
}

static int
ccd_components_sysctl(SYSCTLFN_ARGS)
{
	struct sysctlnode node;
	int error, unit;
	size_t size;
	char *names, *p, *ep;
	struct ccd_softc *sc;

	if (newp == NULL || newlen != sizeof(int))
		return EINVAL;

	size = 0;
	unit = *(const int *)newp;
	newp = NULL;
	newlen = 0;
	mutex_enter(&ccd_lock);
	LIST_FOREACH(sc, &ccds, sc_link)
		if (sc->sc_unit == unit) {
			for (size_t i = 0; i < sc->sc_nccdisks; i++)
				size += strlen(sc->sc_cinfo[i].ci_path) + 1;
			break;
		}
	mutex_exit(&ccd_lock);

	if (size == 0)
		return ENOENT;
	names = kmem_zalloc(size, KM_SLEEP); 
	if (names == NULL)
		return ENOMEM;

	p = names;
	ep = names + size;
	mutex_enter(&ccd_lock);
	LIST_FOREACH(sc, &ccds, sc_link)
		if (sc->sc_unit == unit) {
			for (size_t i = 0; i < sc->sc_nccdisks; i++) {
				char *d = sc->sc_cinfo[i].ci_path;
				while (p < ep && (*p++ = *d++) != '\0') 
					continue;
			}
			break;
		}
	mutex_exit(&ccd_lock);

	node = *rnode;
	node.sysctl_data = names;
	node.sysctl_size = ep - names;

	error = sysctl_lookup(SYSCTLFN_CALL(&node));
	kmem_free(names, size);
	return error;
}

SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup")
{
	const struct sysctlnode *node = NULL;

	sysctl_createv(clog, 0, NULL, &node,
	    CTLFLAG_PERMANENT,
	    CTLTYPE_NODE, "ccd",
	    SYSCTL_DESCR("ConCatenated Disk state"),
	    NULL, 0, NULL, 0,
	    CTL_KERN, CTL_CREATE, CTL_EOL);

	if (node == NULL)
		return;

	sysctl_createv(clog, 0, &node, NULL,
	    CTLFLAG_PERMANENT | CTLFLAG_READONLY,
	    CTLTYPE_STRUCT, "units",
	    SYSCTL_DESCR("List of ccd unit numbers"),
	    ccd_units_sysctl, 0, NULL, 0,
	    CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, &node, NULL,
	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
	    CTLTYPE_STRUCT, "info",
	    SYSCTL_DESCR("Information about a CCD unit"),
	    ccd_info_sysctl, 0, NULL, 0,
	    CTL_CREATE, CTL_EOL);
	sysctl_createv(clog, 0, &node, NULL,
	    CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
	    CTLTYPE_STRUCT, "components",
	    SYSCTL_DESCR("Information about CCD components"),
	    ccd_components_sysctl, 0, NULL, 0,
	    CTL_CREATE, CTL_EOL);
}