[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.370

1.370   ! christos    1: /*     $NetBSD: rf_netbsdkintf.c,v 1.369 2019/02/06 02:49:50 oster Exp $       */
1.281     rmind       2:
1.1       oster       3: /*-
1.295     erh         4:  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1       oster       5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Greg Oster; Jason R. Thorpe.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     21:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     22:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     23:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     24:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     25:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     26:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     27:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     28:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     29:  * POSSIBILITY OF SUCH DAMAGE.
                     30:  */
                     31:
                     32: /*
1.281     rmind      33:  * Copyright (c) 1988 University of Utah.
1.1       oster      34:  * Copyright (c) 1990, 1993
                     35:  *      The Regents of the University of California.  All rights reserved.
                     36:  *
                     37:  * This code is derived from software contributed to Berkeley by
                     38:  * the Systems Programming Group of the University of Utah Computer
                     39:  * Science Department.
                     40:  *
                     41:  * Redistribution and use in source and binary forms, with or without
                     42:  * modification, are permitted provided that the following conditions
                     43:  * are met:
                     44:  * 1. Redistributions of source code must retain the above copyright
                     45:  *    notice, this list of conditions and the following disclaimer.
                     46:  * 2. Redistributions in binary form must reproduce the above copyright
                     47:  *    notice, this list of conditions and the following disclaimer in the
                     48:  *    documentation and/or other materials provided with the distribution.
1.162     agc        49:  * 3. Neither the name of the University nor the names of its contributors
                     50:  *    may be used to endorse or promote products derived from this software
                     51:  *    without specific prior written permission.
                     52:  *
                     53:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     54:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     55:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     56:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     57:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     58:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     59:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     60:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     61:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     62:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     63:  * SUCH DAMAGE.
                     64:  *
                     65:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                     66:  *
                     67:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     68:  */
                     69:
                     70: /*
1.1       oster      71:  * Copyright (c) 1995 Carnegie-Mellon University.
                     72:  * All rights reserved.
                     73:  *
                     74:  * Authors: Mark Holland, Jim Zelenka
                     75:  *
                     76:  * Permission to use, copy, modify and distribute this software and
                     77:  * its documentation is hereby granted, provided that both the copyright
                     78:  * notice and this permission notice appear in all copies of the
                     79:  * software, derivative works or modified versions, and any portions
                     80:  * thereof, and that both notices appear in supporting documentation.
                     81:  *
                     82:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     83:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     84:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     85:  *
                     86:  * Carnegie Mellon requests users of this software to return to
                     87:  *
                     88:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     89:  *  School of Computer Science
                     90:  *  Carnegie Mellon University
                     91:  *  Pittsburgh PA 15213-3890
                     92:  *
                     93:  * any improvements or extensions that they make and grant Carnegie the
                     94:  * rights to redistribute these changes.
                     95:  */
                     96:
                     97: /***********************************************************
                     98:  *
                     99:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    100:  *
                    101:  ***********************************************************/
1.112     lukem     102:
                    103: #include <sys/cdefs.h>
1.370   ! christos  104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.369 2019/02/06 02:49:50 oster Exp $");
1.251     ad        105:
                    106: #ifdef _KERNEL_OPT
                    107: #include "opt_raid_autoconfig.h"
1.363     mrg       108: #include "opt_compat_netbsd32.h"
1.251     ad        109: #endif
1.1       oster     110:
1.113     lukem     111: #include <sys/param.h>
1.1       oster     112: #include <sys/errno.h>
                    113: #include <sys/pool.h>
1.152     thorpej   114: #include <sys/proc.h>
1.1       oster     115: #include <sys/queue.h>
                    116: #include <sys/disk.h>
                    117: #include <sys/device.h>
                    118: #include <sys/stat.h>
                    119: #include <sys/ioctl.h>
                    120: #include <sys/fcntl.h>
                    121: #include <sys/systm.h>
                    122: #include <sys/vnode.h>
                    123: #include <sys/disklabel.h>
                    124: #include <sys/conf.h>
                    125: #include <sys/buf.h>
1.182     yamt      126: #include <sys/bufq.h>
1.65      oster     127: #include <sys/reboot.h>
1.208     elad      128: #include <sys/kauth.h>
1.327     pgoyette  129: #include <sys/module.h>
1.358     pgoyette  130: #include <sys/compat_stub.h>
1.8       oster     131:
1.234     oster     132: #include <prop/proplib.h>
                    133:
1.110     oster     134: #include <dev/raidframe/raidframevar.h>
                    135: #include <dev/raidframe/raidframeio.h>
1.269     jld       136: #include <dev/raidframe/rf_paritymap.h>
1.251     ad        137:
1.1       oster     138: #include "rf_raid.h"
1.44      oster     139: #include "rf_copyback.h"
1.1       oster     140: #include "rf_dag.h"
                    141: #include "rf_dagflags.h"
1.99      oster     142: #include "rf_desc.h"
1.1       oster     143: #include "rf_diskqueue.h"
                    144: #include "rf_etimer.h"
                    145: #include "rf_general.h"
                    146: #include "rf_kintf.h"
                    147: #include "rf_options.h"
                    148: #include "rf_driver.h"
                    149: #include "rf_parityscan.h"
                    150: #include "rf_threadstuff.h"
                    151:
1.325     christos  152: #include "ioconf.h"
                    153:
1.133     oster     154: #ifdef DEBUG
1.9       oster     155: int     rf_kdebug_level = 0;
1.1       oster     156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9       oster     157: #else                          /* DEBUG */
1.1       oster     158: #define db1_printf(a) { }
1.9       oster     159: #endif                         /* DEBUG */
1.1       oster     160:
1.344     christos  161: #ifdef DEBUG_ROOT
                    162: #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
1.345     christos  163: #else
                    164: #define DPRINTF(a, ...)
1.344     christos  165: #endif
                    166:
1.249     oster     167: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289     mrg       168: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287     mrg       169: static rf_declare_cond2(rf_sparet_wait_cv);
                    170: static rf_declare_cond2(rf_sparet_resp_cv);
1.1       oster     171:
1.10      oster     172: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    173:                                                 * spare table */
                    174: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    175:                                                 * installation process */
1.249     oster     176: #endif
1.153     thorpej   177:
                    178: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10      oster     179:
1.1       oster     180: /* prototypes */
1.187     christos  181: static void KernelWakeupFunc(struct buf *);
                    182: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225     christos  183:     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.187     christos  184:     void *, int, struct proc *);
1.300     christos  185: static void raidinit(struct raid_softc *);
1.335     mlelstv   186: static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
1.348     jdolecek  187: static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
1.1       oster     188:
1.261     dyoung    189: static int raid_match(device_t, cfdata_t, void *);
                    190: static void raid_attach(device_t, device_t, void *);
                    191: static int raid_detach(device_t, int);
1.130     gehenna   192:
1.269     jld       193: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
                    194:     daddr_t, daddr_t);
                    195: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
                    196:     daddr_t, daddr_t, int);
                    197:
1.276     mrg       198: static int raidwrite_component_label(unsigned,
                    199:     dev_t, struct vnode *, RF_ComponentLabel_t *);
                    200: static int raidread_component_label(unsigned,
                    201:     dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269     jld       202:
1.335     mlelstv   203: static int raid_diskstart(device_t, struct buf *bp);
                    204: static int raid_dumpblocks(device_t, void *, daddr_t, int);
                    205: static int raid_lastclose(device_t);
1.269     jld       206:
1.324     mrg       207: static dev_type_open(raidopen);
                    208: static dev_type_close(raidclose);
                    209: static dev_type_read(raidread);
                    210: static dev_type_write(raidwrite);
                    211: static dev_type_ioctl(raidioctl);
                    212: static dev_type_strategy(raidstrategy);
                    213: static dev_type_dump(raiddump);
                    214: static dev_type_size(raidsize);
1.130     gehenna   215:
                    216: const struct bdevsw raid_bdevsw = {
1.305     dholland  217:        .d_open = raidopen,
                    218:        .d_close = raidclose,
                    219:        .d_strategy = raidstrategy,
                    220:        .d_ioctl = raidioctl,
                    221:        .d_dump = raiddump,
                    222:        .d_psize = raidsize,
1.311     dholland  223:        .d_discard = nodiscard,
1.305     dholland  224:        .d_flag = D_DISK
1.130     gehenna   225: };
                    226:
                    227: const struct cdevsw raid_cdevsw = {
1.305     dholland  228:        .d_open = raidopen,
                    229:        .d_close = raidclose,
                    230:        .d_read = raidread,
                    231:        .d_write = raidwrite,
                    232:        .d_ioctl = raidioctl,
                    233:        .d_stop = nostop,
                    234:        .d_tty = notty,
                    235:        .d_poll = nopoll,
                    236:        .d_mmap = nommap,
                    237:        .d_kqfilter = nokqfilter,
1.312     dholland  238:        .d_discard = nodiscard,
1.305     dholland  239:        .d_flag = D_DISK
1.130     gehenna   240: };
1.1       oster     241:
1.323     mlelstv   242: static struct dkdriver rf_dkdriver = {
1.335     mlelstv   243:        .d_open = raidopen,
                    244:        .d_close = raidclose,
1.323     mlelstv   245:        .d_strategy = raidstrategy,
1.335     mlelstv   246:        .d_diskstart = raid_diskstart,
                    247:        .d_dumpblocks = raid_dumpblocks,
                    248:        .d_lastclose = raid_lastclose,
1.323     mlelstv   249:        .d_minphys = minphys
                    250: };
1.235     oster     251:
1.1       oster     252: #define        raidunit(x)     DISKUNIT(x)
1.335     mlelstv   253: #define        raidsoftc(dev)  (((struct raid_softc *)device_private(dev))->sc_r.softc)
1.1       oster     254:
1.202     oster     255: extern struct cfdriver raid_cd;
1.266     dyoung    256: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
                    257:     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
                    258:     DVF_DETACH_SHUTDOWN);
1.202     oster     259:
1.353     mrg       260: /* Internal representation of a rf_recon_req */
                    261: struct rf_recon_req_internal {
                    262:        RF_RowCol_t col;
                    263:        RF_ReconReqFlags_t flags;
                    264:        void   *raidPtr;
                    265: };
                    266:
1.186     perry     267: /*
                    268:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    269:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     270:  * kernel memory, especially on writes, and in degraded mode reads.
1.186     perry     271:  *
                    272:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    273:  * a single 64K write will typically require 64K for the old data,
                    274:  * 64K for the old parity, and 64K for the new parity, for a total
1.28      oster     275:  * of 192K (if the parity buffer is not re-used immediately).
1.110     oster     276:  * Even it if is used immediately, that's still 128K, which when multiplied
1.28      oster     277:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186     perry     278:  *
1.28      oster     279:  * Now in degraded mode, for example, a 64K read on the above setup may
1.186     perry     280:  * require data reconstruction, which will require *all* of the 4 remaining
1.28      oster     281:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     282:  */
                    283:
                    284: #ifndef RAIDOUTSTANDING
1.28      oster     285: #define RAIDOUTSTANDING   6
1.20      oster     286: #endif
                    287:
1.1       oster     288: #define RAIDLABELDEV(dev)      \
                    289:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    290:
                    291: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9       oster     292:
1.104     oster     293: static int raidlock(struct raid_softc *);
                    294: static void raidunlock(struct raid_softc *);
1.1       oster     295:
1.266     dyoung    296: static int raid_detach_unlocked(struct raid_softc *);
                    297:
1.104     oster     298: static void rf_markalldirty(RF_Raid_t *);
1.304     christos  299: static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
1.48      oster     300:
1.353     mrg       301: void rf_ReconThread(struct rf_recon_req_internal *);
1.104     oster     302: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
                    303: void rf_CopybackThread(RF_Raid_t *raidPtr);
1.353     mrg       304: void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
1.261     dyoung    305: int rf_autoconfig(device_t);
1.142     thorpej   306: void rf_buildroothack(RF_ConfigSet_t *);
1.104     oster     307:
                    308: RF_AutoConfig_t *rf_find_raid_components(void);
                    309: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
                    310: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.292     oster     311: int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
1.104     oster     312: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
                    313: int rf_set_autoconfig(RF_Raid_t *, int);
                    314: int rf_set_rootpartition(RF_Raid_t *, int);
                    315: void rf_release_all_vps(RF_ConfigSet_t *);
                    316: void rf_cleanup_config_set(RF_ConfigSet_t *);
                    317: int rf_have_enough_components(RF_ConfigSet_t *);
1.300     christos  318: struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278     mrg       319: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48      oster     320:
1.295     erh       321: /*
                    322:  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
                    323:  * Note that this is overridden by having RAID_AUTOCONFIG as an option
                    324:  * in the kernel config file.
                    325:  */
                    326: #ifdef RAID_AUTOCONFIG
                    327: int raidautoconfig = 1;
                    328: #else
                    329: int raidautoconfig = 0;
                    330: #endif
                    331: static bool raidautoconfigdone = false;
1.37      oster     332:
1.177     oster     333: struct RF_Pools_s rf_pools;
                    334:
1.300     christos  335: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
                    336: static kmutex_t raid_lock;
1.1       oster     337:
1.300     christos  338: static struct raid_softc *
                    339: raidcreate(int unit) {
                    340:        struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
                    341:        sc->sc_unit = unit;
1.327     pgoyette  342:        cv_init(&sc->sc_cv, "raidunit");
                    343:        mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
1.300     christos  344:        return sc;
                    345: }
1.1       oster     346:
1.300     christos  347: static void
                    348: raiddestroy(struct raid_softc *sc) {
1.327     pgoyette  349:        cv_destroy(&sc->sc_cv);
                    350:        mutex_destroy(&sc->sc_mutex);
1.300     christos  351:        kmem_free(sc, sizeof(*sc));
                    352: }
1.50      oster     353:
1.300     christos  354: static struct raid_softc *
1.327     pgoyette  355: raidget(int unit, bool create) {
1.300     christos  356:        struct raid_softc *sc;
                    357:        if (unit < 0) {
                    358: #ifdef DIAGNOSTIC
                    359:                panic("%s: unit %d!", __func__, unit);
                    360: #endif
                    361:                return NULL;
                    362:        }
                    363:        mutex_enter(&raid_lock);
                    364:        LIST_FOREACH(sc, &raids, sc_link) {
                    365:                if (sc->sc_unit == unit) {
                    366:                        mutex_exit(&raid_lock);
                    367:                        return sc;
                    368:                }
                    369:        }
                    370:        mutex_exit(&raid_lock);
1.327     pgoyette  371:        if (!create)
                    372:                return NULL;
1.300     christos  373:        if ((sc = raidcreate(unit)) == NULL)
                    374:                return NULL;
                    375:        mutex_enter(&raid_lock);
                    376:        LIST_INSERT_HEAD(&raids, sc, sc_link);
                    377:        mutex_exit(&raid_lock);
                    378:        return sc;
                    379: }
                    380:
                    381: static void
                    382: raidput(struct raid_softc *sc) {
                    383:        mutex_enter(&raid_lock);
                    384:        LIST_REMOVE(sc, sc_link);
                    385:        mutex_exit(&raid_lock);
                    386:        raiddestroy(sc);
                    387: }
1.1       oster     388:
1.300     christos  389: void
                    390: raidattach(int num)
                    391: {
1.62      oster     392:
1.142     thorpej   393:        /*
1.327     pgoyette  394:         * Device attachment and associated initialization now occurs
                    395:         * as part of the module initialization.
1.142     thorpej   396:         */
                    397: }
                    398:
                    399: int
1.261     dyoung    400: rf_autoconfig(device_t self)
1.142     thorpej   401: {
                    402:        RF_AutoConfig_t *ac_list;
                    403:        RF_ConfigSet_t *config_sets;
                    404:
1.295     erh       405:        if (!raidautoconfig || raidautoconfigdone == true)
1.142     thorpej   406:                return (0);
                    407:
                    408:        /* XXX This code can only be run once. */
1.295     erh       409:        raidautoconfigdone = true;
1.142     thorpej   410:
1.307     christos  411: #ifdef __HAVE_CPU_BOOTCONF
                    412:        /*
                    413:         * 0. find the boot device if needed first so we can use it later
                    414:         * this needs to be done before we autoconfigure any raid sets,
                    415:         * because if we use wedges we are not going to be able to open
                    416:         * the boot device later
                    417:         */
                    418:        if (booted_device == NULL)
                    419:                cpu_bootconf();
                    420: #endif
1.48      oster     421:        /* 1. locate all RAID components on the system */
1.258     ad        422:        aprint_debug("Searching for RAID components...\n");
1.48      oster     423:        ac_list = rf_find_raid_components();
                    424:
1.142     thorpej   425:        /* 2. Sort them into their respective sets. */
1.48      oster     426:        config_sets = rf_create_auto_sets(ac_list);
                    427:
1.142     thorpej   428:        /*
1.299     oster     429:         * 3. Evaluate each set and configure the valid ones.
1.142     thorpej   430:         * This gets done in rf_buildroothack().
                    431:         */
                    432:        rf_buildroothack(config_sets);
1.48      oster     433:
1.213     christos  434:        return 1;
1.48      oster     435: }
                    436:
1.367     christos  437: int
                    438: rf_inited(const struct raid_softc *rs) {
                    439:        return (rs->sc_flags & RAIDF_INITED) != 0;
                    440: }
                    441:
1.368     oster     442: RF_Raid_t *
                    443: rf_get_raid(struct raid_softc *rs) {
                    444:        return &rs->sc_r;
                    445: }
                    446:
1.367     christos  447: int
                    448: rf_get_unit(const struct raid_softc *rs) {
                    449:        return rs->sc_unit;
                    450: }
                    451:
1.306     christos  452: static int
1.307     christos  453: rf_containsboot(RF_Raid_t *r, device_t bdv) {
1.359     bad       454:        const char *bootname;
                    455:        size_t len;
                    456:
                    457:        /* if bdv is NULL, the set can't contain it. exit early. */
                    458:        if (bdv == NULL)
                    459:                return 0;
                    460:
                    461:        bootname = device_xname(bdv);
                    462:        len = strlen(bootname);
1.306     christos  463:
                    464:        for (int col = 0; col < r->numCol; col++) {
1.307     christos  465:                const char *devname = r->Disks[col].devname;
1.306     christos  466:                devname += sizeof("/dev/") - 1;
1.307     christos  467:                if (strncmp(devname, "dk", 2) == 0) {
                    468:                        const char *parent =
                    469:                            dkwedge_get_parent_name(r->Disks[col].dev);
                    470:                        if (parent != NULL)
                    471:                                devname = parent;
                    472:                }
1.306     christos  473:                if (strncmp(devname, bootname, len) == 0) {
                    474:                        struct raid_softc *sc = r->softc;
                    475:                        aprint_debug("raid%d includes boot device %s\n",
                    476:                            sc->sc_unit, devname);
                    477:                        return 1;
                    478:                }
                    479:        }
                    480:        return 0;
                    481: }
                    482:
1.48      oster     483: void
1.142     thorpej   484: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48      oster     485: {
                    486:        RF_ConfigSet_t *cset;
                    487:        RF_ConfigSet_t *next_cset;
1.51      oster     488:        int num_root;
1.300     christos  489:        struct raid_softc *sc, *rsc;
1.335     mlelstv   490:        struct dk_softc *dksc;
1.48      oster     491:
1.300     christos  492:        sc = rsc = NULL;
1.51      oster     493:        num_root = 0;
1.48      oster     494:        cset = config_sets;
1.271     dyoung    495:        while (cset != NULL) {
1.48      oster     496:                next_cset = cset->next;
1.186     perry     497:                if (rf_have_enough_components(cset) &&
1.300     christos  498:                    cset->ac->clabel->autoconfigure == 1) {
                    499:                        sc = rf_auto_config_set(cset);
                    500:                        if (sc != NULL) {
1.359     bad       501:                                aprint_debug("raid%d: configured ok, rootable %d\n",
                    502:                                    sc->sc_unit, cset->rootable);
1.51      oster     503:                                if (cset->rootable) {
1.300     christos  504:                                        rsc = sc;
1.51      oster     505:                                        num_root++;
                    506:                                }
                    507:                        } else {
                    508:                                /* The autoconfig didn't work :( */
1.300     christos  509:                                aprint_debug("Autoconfig failed\n");
1.51      oster     510:                                rf_release_all_vps(cset);
1.48      oster     511:                        }
                    512:                } else {
1.186     perry     513:                        /* we're not autoconfiguring this set...
1.48      oster     514:                           release the associated resources */
1.49      oster     515:                        rf_release_all_vps(cset);
1.48      oster     516:                }
                    517:                /* cleanup */
1.49      oster     518:                rf_cleanup_config_set(cset);
1.48      oster     519:                cset = next_cset;
                    520:        }
1.335     mlelstv   521:        dksc = &rsc->sc_dksc;
1.122     oster     522:
1.223     oster     523:        /* if the user has specified what the root device should be
                    524:           then we don't touch booted_device or boothowto... */
                    525:
1.359     bad       526:        if (rootspec != NULL) {
                    527:                DPRINTF("%s: rootspec %s\n", __func__, rootspec);
1.223     oster     528:                return;
1.359     bad       529:        }
1.223     oster     530:
1.122     oster     531:        /* we found something bootable... */
                    532:
1.310     christos  533:        /*
                    534:         * XXX: The following code assumes that the root raid
                    535:         * is the first ('a') partition. This is about the best
                    536:         * we can do with a BSD disklabel, but we might be able
                    537:         * to do better with a GPT label, by setting a specified
                    538:         * attribute to indicate the root partition. We can then
                    539:         * stash the partition number in the r->root_partition
                    540:         * high bits (the bottom 2 bits are already used). For
                    541:         * now we just set booted_partition to 0 when we override
                    542:         * root.
                    543:         */
1.122     oster     544:        if (num_root == 1) {
1.306     christos  545:                device_t candidate_root;
1.335     mlelstv   546:                if (dksc->sc_dkdev.dk_nwedges != 0) {
1.297     christos  547:                        char cname[sizeof(cset->ac->devname)];
1.344     christos  548:                        /* XXX: assume partition 'a' first */
1.297     christos  549:                        snprintf(cname, sizeof(cname), "%s%c",
1.335     mlelstv   550:                            device_xname(dksc->sc_dev), 'a');
1.306     christos  551:                        candidate_root = dkwedge_find_by_wname(cname);
1.344     christos  552:                        DPRINTF("%s: candidate wedge root=%s\n", __func__,
                    553:                            cname);
                    554:                        if (candidate_root == NULL) {
                    555:                                /*
                    556:                                 * If that is not found, because we don't use
                    557:                                 * disklabel, return the first dk child
                    558:                                 * XXX: we can skip the 'a' check above
                    559:                                 * and always do this...
                    560:                                 */
                    561:                                size_t i = 0;
                    562:                                candidate_root = dkwedge_find_by_parent(
                    563:                                    device_xname(dksc->sc_dev), &i);
                    564:                        }
                    565:                        DPRINTF("%s: candidate wedge root=%p\n", __func__,
                    566:                            candidate_root);
1.297     christos  567:                } else
1.335     mlelstv   568:                        candidate_root = dksc->sc_dev;
1.344     christos  569:                DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
                    570:                DPRINTF("%s: booted_device=%p root_partition=%d "
1.359     bad       571:                        "contains_boot=%d",
                    572:                    __func__, booted_device, rsc->sc_r.root_partition,
                    573:                           rf_containsboot(&rsc->sc_r, booted_device));
                    574:                /* XXX the check for booted_device == NULL can probably be
                    575:                 * dropped, now that rf_containsboot handles that case.
                    576:                 */
1.308     christos  577:                if (booted_device == NULL ||
                    578:                    rsc->sc_r.root_partition == 1 ||
1.310     christos  579:                    rf_containsboot(&rsc->sc_r, booted_device)) {
1.308     christos  580:                        booted_device = candidate_root;
1.351     christos  581:                        booted_method = "raidframe/single";
1.310     christos  582:                        booted_partition = 0;   /* XXX assume 'a' */
                    583:                }
1.122     oster     584:        } else if (num_root > 1) {
1.344     christos  585:                DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
                    586:                    booted_device);
1.226     oster     587:
                    588:                /*
                    589:                 * Maybe the MD code can help. If it cannot, then
                    590:                 * setroot() will discover that we have no
                    591:                 * booted_device and will ask the user if nothing was
                    592:                 * hardwired in the kernel config file
                    593:                 */
                    594:                if (booted_device == NULL)
                    595:                        return;
                    596:
                    597:                num_root = 0;
1.300     christos  598:                mutex_enter(&raid_lock);
                    599:                LIST_FOREACH(sc, &raids, sc_link) {
                    600:                        RF_Raid_t *r = &sc->sc_r;
                    601:                        if (r->valid == 0)
1.226     oster     602:                                continue;
                    603:
1.300     christos  604:                        if (r->root_partition == 0)
1.226     oster     605:                                continue;
                    606:
1.306     christos  607:                        if (rf_containsboot(r, booted_device)) {
1.226     oster     608:                                num_root++;
1.300     christos  609:                                rsc = sc;
1.335     mlelstv   610:                                dksc = &rsc->sc_dksc;
1.226     oster     611:                        }
                    612:                }
1.300     christos  613:                mutex_exit(&raid_lock);
1.295     erh       614:
1.226     oster     615:                if (num_root == 1) {
1.335     mlelstv   616:                        booted_device = dksc->sc_dev;
1.351     christos  617:                        booted_method = "raidframe/multi";
1.310     christos  618:                        booted_partition = 0;   /* XXX assume 'a' */
1.226     oster     619:                } else {
                    620:                        /* we can't guess.. require the user to answer... */
                    621:                        boothowto |= RB_ASKNAME;
                    622:                }
1.51      oster     623:        }
1.1       oster     624: }
                    625:
1.324     mrg       626: static int
1.169     oster     627: raidsize(dev_t dev)
1.1       oster     628: {
                    629:        struct raid_softc *rs;
1.335     mlelstv   630:        struct dk_softc *dksc;
                    631:        unsigned int unit;
1.1       oster     632:
                    633:        unit = raidunit(dev);
1.327     pgoyette  634:        if ((rs = raidget(unit, false)) == NULL)
1.336     mlelstv   635:                return -1;
1.335     mlelstv   636:        dksc = &rs->sc_dksc;
                    637:
1.1       oster     638:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.336     mlelstv   639:                return -1;
1.1       oster     640:
1.335     mlelstv   641:        return dk_size(dksc, dev);
                    642: }
1.1       oster     643:
1.335     mlelstv   644: static int
                    645: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
                    646: {
                    647:        unsigned int unit;
                    648:        struct raid_softc *rs;
                    649:        struct dk_softc *dksc;
1.1       oster     650:
1.335     mlelstv   651:        unit = raidunit(dev);
                    652:        if ((rs = raidget(unit, false)) == NULL)
                    653:                return ENXIO;
                    654:        dksc = &rs->sc_dksc;
1.1       oster     655:
1.335     mlelstv   656:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    657:                return ENODEV;
1.1       oster     658:
1.336     mlelstv   659:         /*
                    660:            Note that blkno is relative to this particular partition.
                    661:            By adding adding RF_PROTECTED_SECTORS, we get a value that
                    662:           is relative to the partition used for the underlying component.
                    663:         */
                    664:        blkno += RF_PROTECTED_SECTORS;
                    665:
1.335     mlelstv   666:        return dk_dump(dksc, dev, blkno, va, size);
1.1       oster     667: }
                    668:
1.324     mrg       669: static int
1.335     mlelstv   670: raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1.1       oster     671: {
1.335     mlelstv   672:        struct raid_softc *rs = raidsoftc(dev);
1.231     oster     673:        const struct bdevsw *bdev;
                    674:        RF_Raid_t *raidPtr;
1.335     mlelstv   675:        int     c, sparecol, j, scol, dumpto;
1.231     oster     676:        int     error = 0;
                    677:
1.300     christos  678:        raidPtr = &rs->sc_r;
1.231     oster     679:
                    680:        /* we only support dumping to RAID 1 sets */
                    681:        if (raidPtr->Layout.numDataCol != 1 ||
                    682:            raidPtr->Layout.numParityCol != 1)
                    683:                return EINVAL;
                    684:
                    685:        if ((error = raidlock(rs)) != 0)
                    686:                return error;
                    687:
                    688:        /* figure out what device is alive.. */
                    689:
                    690:        /*
                    691:           Look for a component to dump to.  The preference for the
                    692:           component to dump to is as follows:
                    693:           1) the master
                    694:           2) a used_spare of the master
                    695:           3) the slave
                    696:           4) a used_spare of the slave
                    697:        */
                    698:
                    699:        dumpto = -1;
                    700:        for (c = 0; c < raidPtr->numCol; c++) {
                    701:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                    702:                        /* this might be the one */
                    703:                        dumpto = c;
                    704:                        break;
                    705:                }
                    706:        }
                    707:
                    708:        /*
                    709:           At this point we have possibly selected a live master or a
                    710:           live slave.  We now check to see if there is a spared
                    711:           master (or a spared slave), if we didn't find a live master
                    712:           or a live slave.
                    713:        */
                    714:
                    715:        for (c = 0; c < raidPtr->numSpare; c++) {
                    716:                sparecol = raidPtr->numCol + c;
                    717:                if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
                    718:                        /* How about this one? */
                    719:                        scol = -1;
                    720:                        for(j=0;j<raidPtr->numCol;j++) {
                    721:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                    722:                                        scol = j;
                    723:                                        break;
                    724:                                }
                    725:                        }
                    726:                        if (scol == 0) {
                    727:                                /*
                    728:                                   We must have found a spared master!
                    729:                                   We'll take that over anything else
                    730:                                   found so far.  (We couldn't have
                    731:                                   found a real master before, since
                    732:                                   this is a used spare, and it's
                    733:                                   saying that it's replacing the
                    734:                                   master.)  On reboot (with
                    735:                                   autoconfiguration turned on)
                    736:                                   sparecol will become the 1st
                    737:                                   component (component0) of this set.
                    738:                                */
                    739:                                dumpto = sparecol;
                    740:                                break;
                    741:                        } else if (scol != -1) {
                    742:                                /*
                    743:                                   Must be a spared slave.  We'll dump
                    744:                                   to that if we havn't found anything
                    745:                                   else so far.
                    746:                                */
                    747:                                if (dumpto == -1)
                    748:                                        dumpto = sparecol;
                    749:                        }
                    750:                }
                    751:        }
                    752:
                    753:        if (dumpto == -1) {
                    754:                /* we couldn't find any live components to dump to!?!?
                    755:                 */
                    756:                error = EINVAL;
                    757:                goto out;
                    758:        }
                    759:
                    760:        bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
1.342     mlelstv   761:        if (bdev == NULL) {
                    762:                error = ENXIO;
                    763:                goto out;
                    764:        }
1.231     oster     765:
                    766:        error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
1.336     mlelstv   767:                                blkno, va, nblk * raidPtr->bytesPerSector);
1.231     oster     768:
                    769: out:
                    770:        raidunlock(rs);
                    771:
                    772:        return error;
1.1       oster     773: }
1.324     mrg       774:
1.1       oster     775: /* ARGSUSED */
1.324     mrg       776: static int
1.222     christos  777: raidopen(dev_t dev, int flags, int fmt,
                    778:     struct lwp *l)
1.1       oster     779: {
1.9       oster     780:        int     unit = raidunit(dev);
1.1       oster     781:        struct raid_softc *rs;
1.335     mlelstv   782:        struct dk_softc *dksc;
                    783:        int     error = 0;
1.9       oster     784:        int     part, pmask;
                    785:
1.327     pgoyette  786:        if ((rs = raidget(unit, true)) == NULL)
1.300     christos  787:                return ENXIO;
1.1       oster     788:        if ((error = raidlock(rs)) != 0)
1.9       oster     789:                return (error);
1.266     dyoung    790:
                    791:        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
                    792:                error = EBUSY;
                    793:                goto bad;
                    794:        }
                    795:
1.335     mlelstv   796:        dksc = &rs->sc_dksc;
1.1       oster     797:
                    798:        part = DISKPART(dev);
                    799:        pmask = (1 << part);
                    800:
1.335     mlelstv   801:        if (!DK_BUSY(dksc, pmask) &&
1.13      oster     802:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    803:                /* First one... mark things as dirty... Note that we *MUST*
                    804:                 have done a configure before this.  I DO NOT WANT TO BE
                    805:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    806:                 THAT THEY BELONG TOGETHER!!!!! */
                    807:                /* XXX should check to see if we're only open for reading
                    808:                   here... If so, we needn't do this, but then need some
                    809:                   other way of keeping track of what's happened.. */
                    810:
1.300     christos  811:                rf_markalldirty(&rs->sc_r);
1.13      oster     812:        }
                    813:
1.335     mlelstv   814:        if ((rs->sc_flags & RAIDF_INITED) != 0)
                    815:                error = dk_open(dksc, dev, flags, fmt, l);
1.1       oster     816:
1.213     christos  817: bad:
1.1       oster     818:        raidunlock(rs);
                    819:
1.9       oster     820:        return (error);
1.1       oster     821:
                    822:
                    823: }
1.324     mrg       824:
1.335     mlelstv   825: static int
                    826: raid_lastclose(device_t self)
                    827: {
                    828:        struct raid_softc *rs = raidsoftc(self);
                    829:
                    830:        /* Last one... device is not unconfigured yet.
                    831:           Device shutdown has taken care of setting the
                    832:           clean bits if RAIDF_INITED is not set
                    833:           mark things as clean... */
                    834:
                    835:        rf_update_component_labels(&rs->sc_r,
                    836:            RF_FINAL_COMPONENT_UPDATE);
                    837:
                    838:        /* pass to unlocked code */
                    839:        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                    840:                rs->sc_flags |= RAIDF_DETACH;
                    841:
                    842:        return 0;
                    843: }
                    844:
1.1       oster     845: /* ARGSUSED */
1.324     mrg       846: static int
1.222     christos  847: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1       oster     848: {
1.9       oster     849:        int     unit = raidunit(dev);
1.1       oster     850:        struct raid_softc *rs;
1.335     mlelstv   851:        struct dk_softc *dksc;
                    852:        cfdata_t cf;
                    853:        int     error = 0, do_detach = 0, do_put = 0;
1.1       oster     854:
1.327     pgoyette  855:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  856:                return ENXIO;
1.335     mlelstv   857:        dksc = &rs->sc_dksc;
1.1       oster     858:
                    859:        if ((error = raidlock(rs)) != 0)
                    860:                return (error);
                    861:
1.335     mlelstv   862:        if ((rs->sc_flags & RAIDF_INITED) != 0) {
                    863:                error = dk_close(dksc, dev, flags, fmt, l);
                    864:                if ((rs->sc_flags & RAIDF_DETACH) != 0)
                    865:                        do_detach = 1;
                    866:        } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                    867:                do_put = 1;
1.1       oster     868:
1.335     mlelstv   869:        raidunlock(rs);
1.1       oster     870:
1.335     mlelstv   871:        if (do_detach) {
                    872:                /* free the pseudo device attach bits */
                    873:                cf = device_cfdata(dksc->sc_dev);
                    874:                error = config_detach(dksc->sc_dev, 0);
                    875:                if (error == 0)
                    876:                        free(cf, M_RAIDFRAME);
                    877:        } else if (do_put) {
                    878:                raidput(rs);
1.1       oster     879:        }
1.186     perry     880:
1.335     mlelstv   881:        return (error);
1.147     oster     882:
1.335     mlelstv   883: }
1.327     pgoyette  884:
1.335     mlelstv   885: static void
                    886: raid_wakeup(RF_Raid_t *raidPtr)
                    887: {
                    888:        rf_lock_mutex2(raidPtr->iodone_lock);
                    889:        rf_signal_cond2(raidPtr->iodone_cv);
                    890:        rf_unlock_mutex2(raidPtr->iodone_lock);
1.1       oster     891: }
                    892:
1.324     mrg       893: static void
1.169     oster     894: raidstrategy(struct buf *bp)
1.1       oster     895: {
1.335     mlelstv   896:        unsigned int unit;
                    897:        struct raid_softc *rs;
                    898:        struct dk_softc *dksc;
1.1       oster     899:        RF_Raid_t *raidPtr;
                    900:
1.335     mlelstv   901:        unit = raidunit(bp->b_dev);
1.327     pgoyette  902:        if ((rs = raidget(unit, false)) == NULL) {
1.30      oster     903:                bp->b_error = ENXIO;
1.335     mlelstv   904:                goto fail;
1.30      oster     905:        }
1.300     christos  906:        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                    907:                bp->b_error = ENXIO;
1.335     mlelstv   908:                goto fail;
1.1       oster     909:        }
1.335     mlelstv   910:        dksc = &rs->sc_dksc;
1.300     christos  911:        raidPtr = &rs->sc_r;
1.335     mlelstv   912:
                    913:        /* Queue IO only */
                    914:        if (dk_strategy_defer(dksc, bp))
1.196     yamt      915:                goto done;
1.1       oster     916:
1.335     mlelstv   917:        /* schedule the IO to happen at the next convenient time */
                    918:        raid_wakeup(raidPtr);
                    919:
                    920: done:
                    921:        return;
                    922:
                    923: fail:
                    924:        bp->b_resid = bp->b_bcount;
                    925:        biodone(bp);
                    926: }
                    927:
                    928: static int
                    929: raid_diskstart(device_t dev, struct buf *bp)
                    930: {
                    931:        struct raid_softc *rs = raidsoftc(dev);
                    932:        RF_Raid_t *raidPtr;
1.1       oster     933:
1.335     mlelstv   934:        raidPtr = &rs->sc_r;
                    935:        if (!raidPtr->valid) {
                    936:                db1_printf(("raid is not valid..\n"));
                    937:                return ENODEV;
1.196     yamt      938:        }
1.285     mrg       939:
1.335     mlelstv   940:        /* XXX */
                    941:        bp->b_resid = 0;
                    942:
                    943:        return raiddoaccess(raidPtr, bp);
                    944: }
1.1       oster     945:
1.335     mlelstv   946: void
                    947: raiddone(RF_Raid_t *raidPtr, struct buf *bp)
                    948: {
                    949:        struct raid_softc *rs;
                    950:        struct dk_softc *dksc;
1.34      oster     951:
1.335     mlelstv   952:        rs = raidPtr->softc;
                    953:        dksc = &rs->sc_dksc;
1.34      oster     954:
1.335     mlelstv   955:        dk_done(dksc, bp);
1.34      oster     956:
1.335     mlelstv   957:        rf_lock_mutex2(raidPtr->mutex);
                    958:        raidPtr->openings++;
                    959:        rf_unlock_mutex2(raidPtr->mutex);
1.196     yamt      960:
1.335     mlelstv   961:        /* schedule more IO */
                    962:        raid_wakeup(raidPtr);
1.1       oster     963: }
1.324     mrg       964:
1.1       oster     965: /* ARGSUSED */
1.324     mrg       966: static int
1.222     christos  967: raidread(dev_t dev, struct uio *uio, int flags)
1.1       oster     968: {
1.9       oster     969:        int     unit = raidunit(dev);
1.1       oster     970:        struct raid_softc *rs;
                    971:
1.327     pgoyette  972:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  973:                return ENXIO;
1.1       oster     974:
                    975:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    976:                return (ENXIO);
                    977:
                    978:        return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
                    979:
                    980: }
1.324     mrg       981:
1.1       oster     982: /* ARGSUSED */
1.324     mrg       983: static int
1.222     christos  984: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1       oster     985: {
1.9       oster     986:        int     unit = raidunit(dev);
1.1       oster     987:        struct raid_softc *rs;
                    988:
1.327     pgoyette  989:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  990:                return ENXIO;
1.1       oster     991:
                    992:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    993:                return (ENXIO);
1.147     oster     994:
1.1       oster     995:        return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
                    996:
                    997: }
                    998:
1.266     dyoung    999: static int
                   1000: raid_detach_unlocked(struct raid_softc *rs)
                   1001: {
1.335     mlelstv  1002:        struct dk_softc *dksc = &rs->sc_dksc;
                   1003:        RF_Raid_t *raidPtr;
1.266     dyoung   1004:        int error;
                   1005:
1.300     christos 1006:        raidPtr = &rs->sc_r;
1.266     dyoung   1007:
1.337     mlelstv  1008:        if (DK_BUSY(dksc, 0) ||
                   1009:            raidPtr->recon_in_progress != 0 ||
                   1010:            raidPtr->parity_rewrite_in_progress != 0 ||
                   1011:            raidPtr->copyback_in_progress != 0)
1.266     dyoung   1012:                return EBUSY;
                   1013:
                   1014:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.333     mlelstv  1015:                return 0;
                   1016:
                   1017:        rs->sc_flags &= ~RAIDF_SHUTDOWN;
                   1018:
                   1019:        if ((error = rf_Shutdown(raidPtr)) != 0)
1.266     dyoung   1020:                return error;
                   1021:
1.335     mlelstv  1022:        rs->sc_flags &= ~RAIDF_INITED;
                   1023:
                   1024:        /* Kill off any queued buffers */
                   1025:        dk_drain(dksc);
                   1026:        bufq_free(dksc->sc_bufq);
                   1027:
1.266     dyoung   1028:        /* Detach the disk. */
1.335     mlelstv  1029:        dkwedge_delall(&dksc->sc_dkdev);
                   1030:        disk_detach(&dksc->sc_dkdev);
                   1031:        disk_destroy(&dksc->sc_dkdev);
                   1032:        dk_detach(dksc);
1.333     mlelstv  1033:
1.266     dyoung   1034:        return 0;
                   1035: }
                   1036:
1.366     christos 1037: static bool
                   1038: rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
                   1039: {
                   1040:        switch (cmd) {
                   1041:        case RAIDFRAME_ADD_HOT_SPARE:
                   1042:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
                   1043:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
                   1044:        case RAIDFRAME_CHECK_PARITY:
                   1045:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
                   1046:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                   1047:        case RAIDFRAME_CHECK_RECON_STATUS:
                   1048:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                   1049:        case RAIDFRAME_COPYBACK:
                   1050:        case RAIDFRAME_DELETE_COMPONENT:
                   1051:        case RAIDFRAME_FAIL_DISK:
                   1052:        case RAIDFRAME_GET_ACCTOTALS:
                   1053:        case RAIDFRAME_GET_COMPONENT_LABEL:
                   1054:        case RAIDFRAME_GET_INFO:
                   1055:        case RAIDFRAME_GET_SIZE:
                   1056:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1057:        case RAIDFRAME_INIT_LABELS:
                   1058:        case RAIDFRAME_KEEP_ACCTOTALS:
                   1059:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
                   1060:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
                   1061:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
                   1062:        case RAIDFRAME_PARITYMAP_STATUS:
                   1063:        case RAIDFRAME_REBUILD_IN_PLACE:
                   1064:        case RAIDFRAME_REMOVE_HOT_SPARE:
                   1065:        case RAIDFRAME_RESET_ACCTOTALS:
                   1066:        case RAIDFRAME_REWRITEPARITY:
                   1067:        case RAIDFRAME_SET_AUTOCONFIG:
                   1068:        case RAIDFRAME_SET_COMPONENT_LABEL:
                   1069:        case RAIDFRAME_SET_ROOT:
1.369     oster    1070:                return (rs->sc_flags & RAIDF_INITED) == 0;
1.366     christos 1071:        }
                   1072:        return false;
                   1073: }
                   1074:
                   1075: int
                   1076: rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
                   1077: {
                   1078:        struct rf_recon_req_internal *rrint;
                   1079:
                   1080:        if (raidPtr->Layout.map->faultsTolerated == 0) {
                   1081:                /* Can't do this on a RAID 0!! */
                   1082:                return EINVAL;
                   1083:        }
                   1084:
                   1085:        if (rr->col < 0 || rr->col >= raidPtr->numCol) {
                   1086:                /* bad column */
                   1087:                return EINVAL;
                   1088:        }
                   1089:
                   1090:        rf_lock_mutex2(raidPtr->mutex);
                   1091:        if (raidPtr->status == rf_rs_reconstructing) {
                   1092:                /* you can't fail a disk while we're reconstructing! */
                   1093:                /* XXX wrong for RAID6 */
                   1094:                goto out;
                   1095:        }
                   1096:        if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
                   1097:            (raidPtr->numFailures > 0)) {
                   1098:                /* some other component has failed.  Let's not make
                   1099:                   things worse. XXX wrong for RAID6 */
                   1100:                goto out;
                   1101:        }
                   1102:        if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
                   1103:                /* Can't fail a spared disk! */
                   1104:                goto out;
                   1105:        }
                   1106:        rf_unlock_mutex2(raidPtr->mutex);
                   1107:
                   1108:        /* make a copy of the recon request so that we don't rely on
                   1109:         * the user's buffer */
                   1110:        RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
                   1111:        if (rrint == NULL)
                   1112:                return(ENOMEM);
                   1113:        rrint->col = rr->col;
                   1114:        rrint->flags = rr->flags;
                   1115:        rrint->raidPtr = raidPtr;
                   1116:
                   1117:        return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
                   1118:            rrint, "raid_recon");
                   1119: out:
                   1120:        rf_unlock_mutex2(raidPtr->mutex);
                   1121:        return EINVAL;
                   1122: }
                   1123:
1.324     mrg      1124: static int
1.367     christos 1125: rf_copyinspecificbuf(RF_Config_t *k_cfg)
                   1126: {
                   1127:        /* allocate a buffer for the layout-specific data, and copy it in */
                   1128:        if (k_cfg->layoutSpecificSize == 0)
                   1129:                return 0;
                   1130:
                   1131:        if (k_cfg->layoutSpecificSize > 10000) {
                   1132:            /* sanity check */
                   1133:            return EINVAL;
                   1134:        }
                   1135:
                   1136:        u_char *specific_buf;
                   1137:        RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, (u_char *));
                   1138:        if (specific_buf == NULL)
                   1139:                return ENOMEM;
                   1140:
                   1141:        int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
                   1142:            k_cfg->layoutSpecificSize);
                   1143:        if (retcode) {
                   1144:                RF_Free(specific_buf, k_cfg->layoutSpecificSize);
                   1145:                db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
                   1146:                return retcode;
                   1147:        }
                   1148:
                   1149:        k_cfg->layoutSpecific = specific_buf;
                   1150:        return 0;
                   1151: }
                   1152:
                   1153: static int
                   1154: rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
                   1155: {
                   1156:        if (rs->sc_r.valid) {
                   1157:                /* There is a valid RAID set running on this unit! */
                   1158:                printf("raid%d: Device already configured!\n", rs->sc_unit);
                   1159:                return EINVAL;
                   1160:        }
                   1161:
                   1162:        /* copy-in the configuration information */
                   1163:        /* data points to a pointer to the configuration structure */
                   1164:        RF_Malloc(*k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
                   1165:        if (*k_cfg == NULL) {
                   1166:                return ENOMEM;
                   1167:        }
                   1168:        int retcode = copyin(data, k_cfg, sizeof(RF_Config_t));
                   1169:        if (retcode == 0)
                   1170:                return 0;
                   1171:        RF_Free(*k_cfg, sizeof(RF_Config_t));
                   1172:        db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
                   1173:        rs->sc_flags |= RAIDF_SHUTDOWN;
                   1174:        return retcode;
                   1175: }
                   1176:
                   1177: int
                   1178: rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
                   1179: {
                   1180:        int retcode;
                   1181:        RF_Raid_t *raidPtr = &rs->sc_r;
                   1182:
                   1183:        rs->sc_flags &= ~RAIDF_SHUTDOWN;
                   1184:
                   1185:        if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
                   1186:                goto out;
                   1187:
                   1188:        /* should do some kind of sanity check on the configuration.
                   1189:         * Store the sum of all the bytes in the last byte? */
                   1190:
                   1191:        /* configure the system */
                   1192:
                   1193:        /*
                   1194:         * Clear the entire RAID descriptor, just to make sure
                   1195:         *  there is no stale data left in the case of a
                   1196:         *  reconfiguration
                   1197:         */
                   1198:        memset(raidPtr, 0, sizeof(*raidPtr));
                   1199:        raidPtr->softc = rs;
                   1200:        raidPtr->raidid = rs->sc_unit;
                   1201:
                   1202:        retcode = rf_Configure(raidPtr, k_cfg, NULL);
                   1203:
                   1204:        if (retcode == 0) {
                   1205:                /* allow this many simultaneous IO's to
                   1206:                   this RAID device */
                   1207:                raidPtr->openings = RAIDOUTSTANDING;
                   1208:
                   1209:                raidinit(rs);
                   1210:                raid_wakeup(raidPtr);
                   1211:                rf_markalldirty(raidPtr);
                   1212:        }
                   1213:
                   1214:        /* free the buffers.  No return code here. */
                   1215:        if (k_cfg->layoutSpecificSize) {
                   1216:                RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
                   1217:        }
                   1218: out:
                   1219:        RF_Free(k_cfg, sizeof(RF_Config_t));
                   1220:        if (retcode) {
                   1221:                /*
                   1222:                 * If configuration failed, set sc_flags so that we
                   1223:                 * will detach the device when we close it.
                   1224:                 */
                   1225:                rs->sc_flags |= RAIDF_SHUTDOWN;
                   1226:        }
                   1227:        return retcode;
                   1228: }
                   1229:
                   1230: #if RF_DISABLED
                   1231: static int
                   1232: rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
                   1233: {
                   1234:
                   1235:        /* XXX check the label for valid stuff... */
                   1236:        /* Note that some things *should not* get modified --
                   1237:           the user should be re-initing the labels instead of
                   1238:           trying to patch things.
                   1239:           */
                   1240: #ifdef DEBUG
                   1241:        int raidid = raidPtr->raidid;
                   1242:        printf("raid%d: Got component label:\n", raidid);
                   1243:        printf("raid%d: Version: %d\n", raidid, clabel->version);
                   1244:        printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
                   1245:        printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
                   1246:        printf("raid%d: Column: %d\n", raidid, clabel->column);
                   1247:        printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
                   1248:        printf("raid%d: Clean: %d\n", raidid, clabel->clean);
                   1249:        printf("raid%d: Status: %d\n", raidid, clabel->status);
                   1250: #endif /* DEBUG */
                   1251:        clabel->row = 0;
                   1252:        int column = clabel->column;
                   1253:
                   1254:        if ((column < 0) || (column >= raidPtr->numCol)) {
                   1255:                return(EINVAL);
                   1256:        }
                   1257:
                   1258:        /* XXX this isn't allowed to do anything for now :-) */
                   1259:
                   1260:        /* XXX and before it is, we need to fill in the rest
                   1261:           of the fields!?!?!?! */
                   1262:        memcpy(raidget_component_label(raidPtr, column),
                   1263:            clabel, sizeof(*clabel));
                   1264:        raidflush_component_label(raidPtr, column);
                   1265:        return 0;
                   1266: }
                   1267: #endif
                   1268:
                   1269: static int
                   1270: rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
                   1271: {
                   1272:        /*
                   1273:           we only want the serial number from
                   1274:           the above.  We get all the rest of the information
                   1275:           from the config that was used to create this RAID
                   1276:           set.
                   1277:           */
                   1278:
                   1279:        raidPtr->serial_number = clabel->serial_number;
                   1280:
                   1281:        for (int column = 0; column < raidPtr->numCol; column++) {
                   1282:                RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
                   1283:                if (RF_DEAD_DISK(diskPtr->status))
                   1284:                        continue;
                   1285:                RF_ComponentLabel_t *ci_label = raidget_component_label(
                   1286:                    raidPtr, column);
                   1287:                /* Zeroing this is important. */
                   1288:                memset(ci_label, 0, sizeof(*ci_label));
                   1289:                raid_init_component_label(raidPtr, ci_label);
                   1290:                ci_label->serial_number = raidPtr->serial_number;
                   1291:                ci_label->row = 0; /* we dont' pretend to support more */
                   1292:                rf_component_label_set_partitionsize(ci_label,
                   1293:                    diskPtr->partitionSize);
                   1294:                ci_label->column = column;
                   1295:                raidflush_component_label(raidPtr, column);
                   1296:                /* XXXjld what about the spares? */
                   1297:        }
                   1298:
                   1299:        return 0;
                   1300: }
                   1301:
                   1302: static int
                   1303: rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
                   1304: {
                   1305:
                   1306:        if (raidPtr->Layout.map->faultsTolerated == 0) {
                   1307:                /* Can't do this on a RAID 0!! */
                   1308:                return EINVAL;
                   1309:        }
                   1310:
                   1311:        if (raidPtr->recon_in_progress == 1) {
                   1312:                /* a reconstruct is already in progress! */
                   1313:                return EINVAL;
                   1314:        }
                   1315:
                   1316:        RF_SingleComponent_t component;
                   1317:        memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1318:        component.row = 0; /* we don't support any more */
                   1319:        int column = component.column;
                   1320:
                   1321:        if ((column < 0) || (column >= raidPtr->numCol)) {
                   1322:                return EINVAL;
                   1323:        }
                   1324:
                   1325:        rf_lock_mutex2(raidPtr->mutex);
                   1326:        if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
                   1327:            (raidPtr->numFailures > 0)) {
                   1328:                /* XXX 0 above shouldn't be constant!!! */
                   1329:                /* some component other than this has failed.
                   1330:                   Let's not make things worse than they already
                   1331:                   are... */
                   1332:                printf("raid%d: Unable to reconstruct to disk at:\n",
                   1333:                       raidPtr->raidid);
                   1334:                printf("raid%d:     Col: %d   Too many failures.\n",
                   1335:                       raidPtr->raidid, column);
                   1336:                rf_unlock_mutex2(raidPtr->mutex);
                   1337:                return EINVAL;
                   1338:        }
                   1339:
                   1340:        if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
                   1341:                printf("raid%d: Unable to reconstruct to disk at:\n",
                   1342:                       raidPtr->raidid);
                   1343:                printf("raid%d:    Col: %d   "
                   1344:                    "Reconstruction already occurring!\n",
                   1345:                    raidPtr->raidid, column);
                   1346:
                   1347:                rf_unlock_mutex2(raidPtr->mutex);
                   1348:                return EINVAL;
                   1349:        }
                   1350:
                   1351:        if (raidPtr->Disks[column].status == rf_ds_spared) {
                   1352:                rf_unlock_mutex2(raidPtr->mutex);
                   1353:                return EINVAL;
                   1354:        }
                   1355:
                   1356:        rf_unlock_mutex2(raidPtr->mutex);
                   1357:
                   1358:        struct rf_recon_req_internal *rrint;
                   1359:        RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
                   1360:        if (rrint == NULL)
                   1361:                return ENOMEM;
                   1362:
                   1363:        rrint->col = column;
                   1364:        rrint->raidPtr = raidPtr;
                   1365:
                   1366:        return RF_CREATE_THREAD(raidPtr->recon_thread,
                   1367:            rf_ReconstructInPlaceThread, rrint, "raid_reconip");
                   1368: }
                   1369:
                   1370: static int
                   1371: rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
                   1372: {
                   1373:        /*
                   1374:         * This makes no sense on a RAID 0, or if we are not reconstructing
                   1375:         * so tell the user it's done.
                   1376:         */
                   1377:        if (raidPtr->Layout.map->faultsTolerated == 0 ||
                   1378:            raidPtr->status != rf_rs_reconstructing) {
                   1379:                *data = 100;
                   1380:                return 0;
                   1381:        }
                   1382:        if (raidPtr->reconControl->numRUsTotal == 0) {
                   1383:                *data = 0;
                   1384:                return 0;
                   1385:        }
                   1386:        *data = (raidPtr->reconControl->numRUsComplete * 100
                   1387:            / raidPtr->reconControl->numRUsTotal);
                   1388:        return 0;
                   1389: }
                   1390:
                   1391: static int
1.225     christos 1392: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1       oster    1393: {
1.9       oster    1394:        int     unit = raidunit(dev);
1.335     mlelstv  1395:        int     part, pmask;
1.1       oster    1396:        struct raid_softc *rs;
1.335     mlelstv  1397:        struct dk_softc *dksc;
1.367     christos 1398:        RF_Config_t *k_cfg;
1.42      oster    1399:        RF_Raid_t *raidPtr;
1.41      oster    1400:        RF_AccTotals_t *totals;
1.367     christos 1401:        RF_SingleComponent_t component;
1.370   ! christos 1402:        RF_DeviceConfig_t *d_cfg;
1.11      oster    1403:        int retcode = 0;
                   1404:        int column;
1.48      oster    1405:        RF_ComponentLabel_t *clabel;
1.12      oster    1406:        RF_SingleComponent_t *sparePtr,*componentPtr;
1.353     mrg      1407:        int d;
1.1       oster    1408:
1.327     pgoyette 1409:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos 1410:                return ENXIO;
1.366     christos 1411:
1.335     mlelstv  1412:        dksc = &rs->sc_dksc;
1.300     christos 1413:        raidPtr = &rs->sc_r;
1.1       oster    1414:
1.276     mrg      1415:        db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1.366     christos 1416:            (int) DISKPART(dev), (int) unit, cmd));
1.1       oster    1417:
                   1418:        /* Must be initialized for these... */
1.366     christos 1419:        if (rf_must_be_initialized(rs, cmd))
                   1420:                return ENXIO;
1.9       oster    1421:
1.358     pgoyette 1422:        switch (cmd) {
1.1       oster    1423:                /* configure the system */
                   1424:        case RAIDFRAME_CONFIGURE:
1.367     christos 1425:                if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
                   1426:                        return retcode;
                   1427:                return rf_construct(rs, k_cfg);
1.9       oster    1428:
                   1429:                /* shutdown the system */
1.1       oster    1430:        case RAIDFRAME_SHUTDOWN:
1.9       oster    1431:
1.266     dyoung   1432:                part = DISKPART(dev);
                   1433:                pmask = (1 << part);
                   1434:
1.367     christos 1435:                if ((retcode = raidlock(rs)) != 0)
                   1436:                        return retcode;
1.1       oster    1437:
1.337     mlelstv  1438:                if (DK_BUSY(dksc, pmask) ||
                   1439:                    raidPtr->recon_in_progress != 0 ||
                   1440:                    raidPtr->parity_rewrite_in_progress != 0 ||
                   1441:                    raidPtr->copyback_in_progress != 0)
1.266     dyoung   1442:                        retcode = EBUSY;
                   1443:                else {
1.335     mlelstv  1444:                        /* detach and free on close */
1.266     dyoung   1445:                        rs->sc_flags |= RAIDF_SHUTDOWN;
                   1446:                        retcode = 0;
1.9       oster    1447:                }
1.11      oster    1448:
1.266     dyoung   1449:                raidunlock(rs);
1.1       oster    1450:
1.367     christos 1451:                return retcode;
1.11      oster    1452:        case RAIDFRAME_GET_COMPONENT_LABEL:
1.353     mrg      1453:                return rf_get_component_label(raidPtr, data);
1.11      oster    1454:
1.367     christos 1455: #if RF_DISABLED
1.11      oster    1456:        case RAIDFRAME_SET_COMPONENT_LABEL:
1.367     christos 1457:                return rf_set_component_label(raidPtr, data);
                   1458: #endif
1.11      oster    1459:
1.367     christos 1460:        case RAIDFRAME_INIT_LABELS:
                   1461:                return rf_init_component_label(raidPtr, data);
1.12      oster    1462:
1.48      oster    1463:        case RAIDFRAME_SET_AUTOCONFIG:
1.78      minoura  1464:                d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186     perry    1465:                printf("raid%d: New autoconfig value is: %d\n",
1.123     oster    1466:                       raidPtr->raidid, d);
1.78      minoura  1467:                *(int *) data = d;
1.367     christos 1468:                return retcode;
1.48      oster    1469:
                   1470:        case RAIDFRAME_SET_ROOT:
1.78      minoura  1471:                d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186     perry    1472:                printf("raid%d: New rootpartition value is: %d\n",
1.123     oster    1473:                       raidPtr->raidid, d);
1.78      minoura  1474:                *(int *) data = d;
1.367     christos 1475:                return retcode;
1.9       oster    1476:
1.1       oster    1477:                /* initialize all parity */
                   1478:        case RAIDFRAME_REWRITEPARITY:
                   1479:
1.42      oster    1480:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17      oster    1481:                        /* Parity for RAID 0 is trivially correct */
1.42      oster    1482:                        raidPtr->parity_good = RF_RAID_CLEAN;
1.367     christos 1483:                        return 0;
1.17      oster    1484:                }
1.186     perry    1485:
1.42      oster    1486:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.37      oster    1487:                        /* Re-write is already in progress! */
1.367     christos 1488:                        return EINVAL;
1.37      oster    1489:                }
1.27      oster    1490:
1.367     christos 1491:                return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
                   1492:                    rf_RewriteParityThread, raidPtr,"raid_parity");
1.11      oster    1493:
                   1494:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster    1495:                sparePtr = (RF_SingleComponent_t *) data;
1.367     christos 1496:                memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
                   1497:                return rf_add_hot_spare(raidPtr, &component);
1.11      oster    1498:
                   1499:        case RAIDFRAME_REMOVE_HOT_SPARE:
1.367     christos 1500:                return retcode;
1.73      oster    1501:
                   1502:        case RAIDFRAME_DELETE_COMPONENT:
                   1503:                componentPtr = (RF_SingleComponent_t *)data;
1.367     christos 1504:                memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1505:                return rf_delete_component(raidPtr, &component);
1.73      oster    1506:
                   1507:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1508:                componentPtr = (RF_SingleComponent_t *)data;
1.367     christos 1509:                memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1510:                return rf_incorporate_hot_spare(raidPtr, &component);
1.11      oster    1511:
1.12      oster    1512:        case RAIDFRAME_REBUILD_IN_PLACE:
1.367     christos 1513:                return rf_rebuild_in_place(raidPtr, data);
1.24      oster    1514:
1.366     christos 1515:        case RAIDFRAME_GET_INFO:
1.41      oster    1516:                RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
                   1517:                          (RF_DeviceConfig_t *));
                   1518:                if (d_cfg == NULL)
1.366     christos 1519:                        return ENOMEM;
1.353     mrg      1520:                retcode = rf_get_info(raidPtr, d_cfg);
                   1521:                if (retcode == 0) {
1.370   ! christos 1522:                        retcode = copyout(d_cfg, data, sizeof(*d_cfg));
1.41      oster    1523:                }
                   1524:                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1.366     christos 1525:                return retcode;
1.9       oster    1526:
1.22      oster    1527:        case RAIDFRAME_CHECK_PARITY:
1.42      oster    1528:                *(int *) data = raidPtr->parity_good;
1.367     christos 1529:                return 0;
1.41      oster    1530:
1.269     jld      1531:        case RAIDFRAME_PARITYMAP_STATUS:
1.273     jld      1532:                if (rf_paritymap_ineligible(raidPtr))
                   1533:                        return EINVAL;
1.367     christos 1534:                rf_paritymap_status(raidPtr->parity_map, data);
1.269     jld      1535:                return 0;
                   1536:
                   1537:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273     jld      1538:                if (rf_paritymap_ineligible(raidPtr))
                   1539:                        return EINVAL;
1.269     jld      1540:                if (raidPtr->parity_map == NULL)
                   1541:                        return ENOENT; /* ??? */
1.367     christos 1542:                if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1.269     jld      1543:                        return EINVAL;
                   1544:                return 0;
                   1545:
                   1546:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273     jld      1547:                if (rf_paritymap_ineligible(raidPtr))
                   1548:                        return EINVAL;
1.269     jld      1549:                *(int *) data = rf_paritymap_get_disable(raidPtr);
                   1550:                return 0;
                   1551:
                   1552:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273     jld      1553:                if (rf_paritymap_ineligible(raidPtr))
                   1554:                        return EINVAL;
1.269     jld      1555:                rf_paritymap_set_disable(raidPtr, *(int *)data);
                   1556:                /* XXX should errors be passed up? */
                   1557:                return 0;
                   1558:
1.1       oster    1559:        case RAIDFRAME_RESET_ACCTOTALS:
1.108     thorpej  1560:                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.367     christos 1561:                return 0;
1.9       oster    1562:
1.1       oster    1563:        case RAIDFRAME_GET_ACCTOTALS:
1.41      oster    1564:                totals = (RF_AccTotals_t *) data;
1.42      oster    1565:                *totals = raidPtr->acc_totals;
1.366     christos 1566:                return 0;
1.9       oster    1567:
1.1       oster    1568:        case RAIDFRAME_KEEP_ACCTOTALS:
1.42      oster    1569:                raidPtr->keep_acc_totals = *(int *)data;
1.366     christos 1570:                return 0;
1.9       oster    1571:
1.1       oster    1572:        case RAIDFRAME_GET_SIZE:
1.42      oster    1573:                *(int *) data = raidPtr->totalSectors;
1.366     christos 1574:                return 0;
1.1       oster    1575:
                   1576:        case RAIDFRAME_FAIL_DISK:
1.366     christos 1577:                return rf_fail_disk(raidPtr, data);
1.9       oster    1578:
                   1579:                /* invoke a copyback operation after recon on whatever disk
                   1580:                 * needs it, if any */
                   1581:        case RAIDFRAME_COPYBACK:
1.24      oster    1582:
1.42      oster    1583:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1584:                        /* This makes no sense on a RAID 0!! */
1.367     christos 1585:                        return EINVAL;
1.24      oster    1586:                }
                   1587:
1.42      oster    1588:                if (raidPtr->copyback_in_progress == 1) {
1.37      oster    1589:                        /* Copyback is already in progress! */
1.367     christos 1590:                        return EINVAL;
1.37      oster    1591:                }
1.27      oster    1592:
1.367     christos 1593:                return RF_CREATE_THREAD(raidPtr->copyback_thread,
                   1594:                    rf_CopybackThread, raidPtr, "raid_copyback");
1.9       oster    1595:
1.1       oster    1596:                /* return the percentage completion of reconstruction */
1.37      oster    1597:        case RAIDFRAME_CHECK_RECON_STATUS:
1.367     christos 1598:                return rf_check_recon_status(raidPtr, data);
                   1599:
1.83      oster    1600:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.353     mrg      1601:                rf_check_recon_status_ext(raidPtr, data);
1.367     christos 1602:                return 0;
1.9       oster    1603:
1.37      oster    1604:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42      oster    1605:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80      oster    1606:                        /* This makes no sense on a RAID 0, so tell the
                   1607:                           user it's done. */
                   1608:                        *(int *) data = 100;
1.367     christos 1609:                        return 0;
1.37      oster    1610:                }
1.42      oster    1611:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.186     perry    1612:                        *(int *) data = 100 *
                   1613:                                raidPtr->parity_rewrite_stripes_done /
1.83      oster    1614:                                raidPtr->Layout.numStripe;
1.37      oster    1615:                } else {
                   1616:                        *(int *) data = 100;
                   1617:                }
1.367     christos 1618:                return 0;
1.37      oster    1619:
1.83      oster    1620:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.353     mrg      1621:                rf_check_parityrewrite_status_ext(raidPtr, data);
1.367     christos 1622:                return 0;
1.83      oster    1623:
1.37      oster    1624:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42      oster    1625:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37      oster    1626:                        /* This makes no sense on a RAID 0 */
1.83      oster    1627:                        *(int *) data = 100;
1.367     christos 1628:                        return 0;
1.37      oster    1629:                }
1.42      oster    1630:                if (raidPtr->copyback_in_progress == 1) {
                   1631:                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                   1632:                                raidPtr->Layout.numStripe;
1.37      oster    1633:                } else {
                   1634:                        *(int *) data = 100;
                   1635:                }
1.367     christos 1636:                return 0;
1.37      oster    1637:
1.83      oster    1638:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.353     mrg      1639:                rf_check_copyback_status_ext(raidPtr, data);
                   1640:                return 0;
1.37      oster    1641:
1.341     christos 1642:        case RAIDFRAME_SET_LAST_UNIT:
                   1643:                for (column = 0; column < raidPtr->numCol; column++)
                   1644:                        if (raidPtr->Disks[column].status != rf_ds_optimal)
                   1645:                                return EBUSY;
                   1646:
                   1647:                for (column = 0; column < raidPtr->numCol; column++) {
                   1648:                        clabel = raidget_component_label(raidPtr, column);
                   1649:                        clabel->last_unit = *(int *)data;
                   1650:                        raidflush_component_label(raidPtr, column);
                   1651:                }
                   1652:                rs->sc_cflags |= RAIDF_UNIT_CHANGED;
                   1653:                return 0;
                   1654:
1.9       oster    1655:                /* the sparetable daemon calls this to wait for the kernel to
                   1656:                 * need a spare table. this ioctl does not return until a
                   1657:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1658:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1659:                 * -- I should either compute the spare table in the kernel,
                   1660:                 * or have a different -- XXX XXX -- interface (a different
1.42      oster    1661:                 * character device) for delivering the table     -- XXX */
1.367     christos 1662: #if RF_DISABLED
1.1       oster    1663:        case RAIDFRAME_SPARET_WAIT:
1.287     mrg      1664:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1665:                while (!rf_sparet_wait_queue)
1.287     mrg      1666:                        rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.367     christos 1667:                RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1.1       oster    1668:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287     mrg      1669:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1670:
1.42      oster    1671:                /* structure assignment */
1.186     perry    1672:                *((RF_SparetWait_t *) data) = *waitreq;
1.9       oster    1673:
1.1       oster    1674:                RF_Free(waitreq, sizeof(*waitreq));
1.367     christos 1675:                return 0;
1.9       oster    1676:
                   1677:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1678:                 * code in it that will cause the dameon to exit */
1.1       oster    1679:        case RAIDFRAME_ABORT_SPARET_WAIT:
                   1680:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1681:                waitreq->fcol = -1;
1.287     mrg      1682:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1683:                waitreq->next = rf_sparet_wait_queue;
                   1684:                rf_sparet_wait_queue = waitreq;
1.367     christos 1685:                rf_broadcast_cond2(rf_sparet_wait_cv);
1.287     mrg      1686:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.367     christos 1687:                return 0;
1.1       oster    1688:
1.9       oster    1689:                /* used by the spare table daemon to deliver a spare table
                   1690:                 * into the kernel */
1.1       oster    1691:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1692:
1.1       oster    1693:                /* install the spare table */
1.42      oster    1694:                retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9       oster    1695:
                   1696:                /* respond to the requestor.  the return status of the spare
                   1697:                 * table installation is passed in the "fcol" field */
1.1       oster    1698:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1699:                waitreq->fcol = retcode;
1.287     mrg      1700:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1701:                waitreq->next = rf_sparet_resp_queue;
                   1702:                rf_sparet_resp_queue = waitreq;
1.287     mrg      1703:                rf_broadcast_cond2(rf_sparet_resp_cv);
                   1704:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1705:
1.367     christos 1706:                return retcode;
                   1707: #endif
                   1708:        default:
                   1709: #ifdef _LP64
                   1710:                if ((l->l_proc->p_flag & PK_32) != 0) {
                   1711:                        module_autoload("compat_netbsd32_raid",
                   1712:                            MODULE_CLASS_EXEC);
                   1713:                        MODULE_CALL_HOOK(raidframe_netbsd32_ioctl_hook,
                   1714:                            (rs, cmd, data), enosys(), retcode);
                   1715:                        if (retcode != EPASSTHROUGH)
                   1716:                                return retcode;
                   1717:                }
1.1       oster    1718: #endif
1.367     christos 1719:                module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
                   1720:                MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
                   1721:                    (rs, cmd, data), enosys(), retcode);
                   1722:                if (retcode != EPASSTHROUGH)
                   1723:                        return retcode;
1.1       oster    1724:
1.367     christos 1725:                module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
                   1726:                MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
                   1727:                    (rs, cmd, data), enosys(), retcode);
                   1728:                if (retcode != EPASSTHROUGH)
                   1729:                        return retcode;
1.36      oster    1730:                break; /* fall through to the os-specific code below */
1.1       oster    1731:
                   1732:        }
1.9       oster    1733:
1.42      oster    1734:        if (!raidPtr->valid)
1.9       oster    1735:                return (EINVAL);
                   1736:
1.1       oster    1737:        /*
                   1738:         * Add support for "regular" device ioctls here.
                   1739:         */
1.263     haad     1740:
1.1       oster    1741:        switch (cmd) {
1.348     jdolecek 1742:        case DIOCGCACHE:
                   1743:                retcode = rf_get_component_caches(raidPtr, (int *)data);
                   1744:                break;
                   1745:
1.252     oster    1746:        case DIOCCACHESYNC:
1.346     jdolecek 1747:                retcode = rf_sync_component_caches(raidPtr);
1.347     jdolecek 1748:                break;
1.298     buhrow   1749:
1.1       oster    1750:        default:
1.346     jdolecek 1751:                retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1.347     jdolecek 1752:                break;
1.1       oster    1753:        }
1.346     jdolecek 1754:
1.9       oster    1755:        return (retcode);
1.1       oster    1756:
                   1757: }
                   1758:
                   1759:
1.9       oster    1760: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1761:    RAIDframe device.  */
                   1762:
                   1763:
1.59      oster    1764: static void
1.300     christos 1765: raidinit(struct raid_softc *rs)
1.1       oster    1766: {
1.262     cegger   1767:        cfdata_t cf;
1.335     mlelstv  1768:        unsigned int unit;
                   1769:        struct dk_softc *dksc = &rs->sc_dksc;
1.300     christos 1770:        RF_Raid_t *raidPtr = &rs->sc_r;
1.335     mlelstv  1771:        device_t dev;
1.1       oster    1772:
1.59      oster    1773:        unit = raidPtr->raidid;
1.1       oster    1774:
1.179     itojun   1775:        /* XXX doesn't check bounds. */
1.335     mlelstv  1776:        snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1.1       oster    1777:
1.217     oster    1778:        /* attach the pseudo device */
                   1779:        cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
                   1780:        cf->cf_name = raid_cd.cd_name;
                   1781:        cf->cf_atname = raid_cd.cd_name;
                   1782:        cf->cf_unit = unit;
                   1783:        cf->cf_fstate = FSTATE_STAR;
                   1784:
1.335     mlelstv  1785:        dev = config_attach_pseudo(cf);
                   1786:        if (dev == NULL) {
1.217     oster    1787:                printf("raid%d: config_attach_pseudo failed\n",
1.270     christos 1788:                    raidPtr->raidid);
1.265     pooka    1789:                free(cf, M_RAIDFRAME);
                   1790:                return;
1.217     oster    1791:        }
                   1792:
1.335     mlelstv  1793:        /* provide a backpointer to the real softc */
                   1794:        raidsoftc(dev) = rs;
                   1795:
1.1       oster    1796:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1797:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1798:         * with disklabels. */
1.335     mlelstv  1799:        dk_init(dksc, dev, DKTYPE_RAID);
                   1800:        disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.1       oster    1801:
                   1802:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1803:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1804:
1.9       oster    1805:        rs->sc_size = raidPtr->totalSectors;
1.234     oster    1806:
1.335     mlelstv  1807:        /* Attach dk and disk subsystems */
                   1808:        dk_attach(dksc);
                   1809:        disk_attach(&dksc->sc_dkdev);
1.318     mlelstv  1810:        rf_set_geometry(rs, raidPtr);
                   1811:
1.335     mlelstv  1812:        bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
                   1813:
                   1814:        /* mark unit as usuable */
                   1815:        rs->sc_flags |= RAIDF_INITED;
1.234     oster    1816:
1.335     mlelstv  1817:        dkwedge_discover(&dksc->sc_dkdev);
1.1       oster    1818: }
1.335     mlelstv  1819:
1.150     oster    1820: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1       oster    1821: /* wake up the daemon & tell it to get us a spare table
                   1822:  * XXX
1.9       oster    1823:  * the entries in the queues should be tagged with the raidPtr
1.186     perry    1824:  * so that in the extremely rare case that two recons happen at once,
1.11      oster    1825:  * we know for which device were requesting a spare table
1.1       oster    1826:  * XXX
1.186     perry    1827:  *
1.39      oster    1828:  * XXX This code is not currently used. GO
1.1       oster    1829:  */
1.186     perry    1830: int
1.169     oster    1831: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9       oster    1832: {
                   1833:        int     retcode;
                   1834:
1.287     mrg      1835:        rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1836:        req->next = rf_sparet_wait_queue;
                   1837:        rf_sparet_wait_queue = req;
1.289     mrg      1838:        rf_broadcast_cond2(rf_sparet_wait_cv);
1.9       oster    1839:
                   1840:        /* mpsleep unlocks the mutex */
                   1841:        while (!rf_sparet_resp_queue) {
1.289     mrg      1842:                rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9       oster    1843:        }
                   1844:        req = rf_sparet_resp_queue;
                   1845:        rf_sparet_resp_queue = req->next;
1.287     mrg      1846:        rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1847:
                   1848:        retcode = req->fcol;
                   1849:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1850:                                         * alloc'd */
                   1851:        return (retcode);
1.1       oster    1852: }
1.150     oster    1853: #endif
1.39      oster    1854:
1.186     perry    1855: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11      oster    1856:  * bp & passes it down.
1.1       oster    1857:  * any calls originating in the kernel must use non-blocking I/O
                   1858:  * do some extra sanity checking to return "appropriate" error values for
                   1859:  * certain conditions (to make some standard utilities work)
1.186     perry    1860:  *
1.34      oster    1861:  * Formerly known as: rf_DoAccessKernel
1.1       oster    1862:  */
1.34      oster    1863: void
1.169     oster    1864: raidstart(RF_Raid_t *raidPtr)
1.1       oster    1865: {
                   1866:        struct raid_softc *rs;
1.335     mlelstv  1867:        struct dk_softc *dksc;
1.1       oster    1868:
1.300     christos 1869:        rs = raidPtr->softc;
1.335     mlelstv  1870:        dksc = &rs->sc_dksc;
1.56      oster    1871:        /* quick check to see if anything has died recently */
1.291     mrg      1872:        rf_lock_mutex2(raidPtr->mutex);
1.56      oster    1873:        if (raidPtr->numNewFailures > 0) {
1.291     mrg      1874:                rf_unlock_mutex2(raidPtr->mutex);
1.186     perry    1875:                rf_update_component_labels(raidPtr,
1.91      oster    1876:                                           RF_NORMAL_COMPONENT_UPDATE);
1.291     mrg      1877:                rf_lock_mutex2(raidPtr->mutex);
1.56      oster    1878:                raidPtr->numNewFailures--;
                   1879:        }
1.335     mlelstv  1880:        rf_unlock_mutex2(raidPtr->mutex);
1.56      oster    1881:
1.335     mlelstv  1882:        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                   1883:                printf("raid%d: raidstart not ready\n", raidPtr->raidid);
                   1884:                return;
                   1885:        }
1.34      oster    1886:
1.335     mlelstv  1887:        dk_start(dksc, NULL);
                   1888: }
1.34      oster    1889:
1.335     mlelstv  1890: static int
                   1891: raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
                   1892: {
                   1893:        RF_SectorCount_t num_blocks, pb, sum;
                   1894:        RF_RaidAddr_t raid_addr;
                   1895:        daddr_t blocknum;
                   1896:        int     do_async;
                   1897:        int rc;
1.186     perry    1898:
1.335     mlelstv  1899:        rf_lock_mutex2(raidPtr->mutex);
                   1900:        if (raidPtr->openings == 0) {
                   1901:                rf_unlock_mutex2(raidPtr->mutex);
                   1902:                return EAGAIN;
                   1903:        }
                   1904:        rf_unlock_mutex2(raidPtr->mutex);
1.186     perry    1905:
1.335     mlelstv  1906:        blocknum = bp->b_rawblkno;
1.186     perry    1907:
1.335     mlelstv  1908:        db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
                   1909:                    (int) blocknum));
1.1       oster    1910:
1.335     mlelstv  1911:        db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   1912:        db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1       oster    1913:
1.335     mlelstv  1914:        /* *THIS* is where we adjust what block we're going to...
                   1915:         * but DO NOT TOUCH bp->b_blkno!!! */
                   1916:        raid_addr = blocknum;
                   1917:
                   1918:        num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
                   1919:        pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
                   1920:        sum = raid_addr + num_blocks + pb;
                   1921:        if (1 || rf_debugKernelAccess) {
                   1922:                db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   1923:                            (int) raid_addr, (int) sum, (int) num_blocks,
                   1924:                            (int) pb, (int) bp->b_resid));
                   1925:        }
                   1926:        if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
                   1927:            || (sum < num_blocks) || (sum < pb)) {
                   1928:                rc = ENOSPC;
                   1929:                goto done;
                   1930:        }
                   1931:        /*
                   1932:         * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   1933:         */
1.186     perry    1934:
1.335     mlelstv  1935:        if (bp->b_bcount & raidPtr->sectorMask) {
                   1936:                rc = ENOSPC;
                   1937:                goto done;
                   1938:        }
                   1939:        db1_printf(("Calling DoAccess..\n"));
1.99      oster    1940:
1.20      oster    1941:
1.335     mlelstv  1942:        rf_lock_mutex2(raidPtr->mutex);
                   1943:        raidPtr->openings--;
1.291     mrg      1944:        rf_unlock_mutex2(raidPtr->mutex);
1.20      oster    1945:
1.335     mlelstv  1946:        /*
                   1947:         * Everything is async.
                   1948:         */
                   1949:        do_async = 1;
1.20      oster    1950:
1.335     mlelstv  1951:        /* don't ever condition on bp->b_flags & B_WRITE.
                   1952:         * always condition on B_READ instead */
1.7       explorer 1953:
1.335     mlelstv  1954:        rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                   1955:                         RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   1956:                         do_async, raid_addr, num_blocks,
                   1957:                         bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
                   1958:
                   1959: done:
                   1960:        return rc;
                   1961: }
1.7       explorer 1962:
1.1       oster    1963: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   1964:
1.186     perry    1965: int
1.169     oster    1966: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1       oster    1967: {
1.9       oster    1968:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    1969:        struct buf *bp;
1.9       oster    1970:
1.1       oster    1971:        req->queue = queue;
                   1972:        bp = req->bp;
                   1973:
                   1974:        switch (req->type) {
1.9       oster    1975:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
1.1       oster    1976:                /* XXX need to do something extra here.. */
1.9       oster    1977:                /* I'm leaving this in, as I've never actually seen it used,
                   1978:                 * and I'd like folks to report it... GO */
1.1       oster    1979:                printf(("WAKEUP CALLED\n"));
                   1980:                queue->numOutstanding++;
                   1981:
1.197     oster    1982:                bp->b_flags = 0;
1.207     simonb   1983:                bp->b_private = req;
1.1       oster    1984:
1.194     oster    1985:                KernelWakeupFunc(bp);
1.1       oster    1986:                break;
1.9       oster    1987:
1.1       oster    1988:        case RF_IO_TYPE_READ:
                   1989:        case RF_IO_TYPE_WRITE:
1.175     oster    1990: #if RF_ACC_TRACE > 0
1.1       oster    1991:                if (req->tracerec) {
                   1992:                        RF_ETIMER_START(req->tracerec->timer);
                   1993:                }
1.175     oster    1994: #endif
1.194     oster    1995:                InitBP(bp, queue->rf_cinfo->ci_vp,
1.197     oster    1996:                    op, queue->rf_cinfo->ci_dev,
1.9       oster    1997:                    req->sectorOffset, req->numSector,
                   1998:                    req->buf, KernelWakeupFunc, (void *) req,
                   1999:                    queue->raidPtr->logBytesPerSector, req->b_proc);
1.1       oster    2000:
                   2001:                if (rf_debugKernelAccess) {
1.9       oster    2002:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   2003:                                (long) bp->b_blkno));
1.1       oster    2004:                }
                   2005:                queue->numOutstanding++;
                   2006:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    2007:                /* acc wouldn't have been let in if there were any pending
                   2008:                 * reqs at any other priority */
1.1       oster    2009:                queue->curPriority = req->priority;
                   2010:
1.166     oster    2011:                db1_printf(("Going for %c to unit %d col %d\n",
1.186     perry    2012:                            req->type, queue->raidPtr->raidid,
1.166     oster    2013:                            queue->col));
1.1       oster    2014:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    2015:                        (int) req->sectorOffset, (int) req->numSector,
                   2016:                        (int) (req->numSector <<
                   2017:                            queue->raidPtr->logBytesPerSector),
                   2018:                        (int) queue->raidPtr->logBytesPerSector));
1.256     oster    2019:
                   2020:                /*
                   2021:                 * XXX: drop lock here since this can block at
                   2022:                 * least with backing SCSI devices.  Retake it
                   2023:                 * to minimize fuss with calling interfaces.
                   2024:                 */
                   2025:
                   2026:                RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247     oster    2027:                bdev_strategy(bp);
1.256     oster    2028:                RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1       oster    2029:                break;
1.9       oster    2030:
1.1       oster    2031:        default:
                   2032:                panic("bad req->type in rf_DispatchKernelIO");
                   2033:        }
                   2034:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.134     oster    2035:
1.9       oster    2036:        return (0);
1.1       oster    2037: }
1.9       oster    2038: /* this is the callback function associated with a I/O invoked from
1.1       oster    2039:    kernel code.
                   2040:  */
1.186     perry    2041: static void
1.194     oster    2042: KernelWakeupFunc(struct buf *bp)
1.9       oster    2043: {
                   2044:        RF_DiskQueueData_t *req = NULL;
                   2045:        RF_DiskQueue_t *queue;
                   2046:
                   2047:        db1_printf(("recovering the request queue:\n"));
1.285     mrg      2048:
1.207     simonb   2049:        req = bp->b_private;
1.1       oster    2050:
1.9       oster    2051:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    2052:
1.286     mrg      2053:        rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285     mrg      2054:
1.175     oster    2055: #if RF_ACC_TRACE > 0
1.9       oster    2056:        if (req->tracerec) {
                   2057:                RF_ETIMER_STOP(req->tracerec->timer);
                   2058:                RF_ETIMER_EVAL(req->tracerec->timer);
1.288     mrg      2059:                rf_lock_mutex2(rf_tracing_mutex);
1.9       oster    2060:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2061:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2062:                req->tracerec->num_phys_ios++;
1.288     mrg      2063:                rf_unlock_mutex2(rf_tracing_mutex);
1.9       oster    2064:        }
1.175     oster    2065: #endif
1.1       oster    2066:
1.230     ad       2067:        /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9       oster    2068:         * ballistic, and mark the component as hosed... */
1.36      oster    2069:
1.230     ad       2070:        if (bp->b_error != 0) {
1.9       oster    2071:                /* Mark the disk as dead */
                   2072:                /* but only mark it once... */
1.186     perry    2073:                /* and only if it wouldn't leave this RAID set
1.183     oster    2074:                   completely broken */
1.193     oster    2075:                if (((queue->raidPtr->Disks[queue->col].status ==
                   2076:                      rf_ds_optimal) ||
                   2077:                     (queue->raidPtr->Disks[queue->col].status ==
                   2078:                      rf_ds_used_spare)) &&
                   2079:                     (queue->raidPtr->numFailures <
1.204     simonb   2080:                      queue->raidPtr->Layout.map->faultsTolerated)) {
1.322     prlw1    2081:                        printf("raid%d: IO Error (%d). Marking %s as failed.\n",
1.136     oster    2082:                               queue->raidPtr->raidid,
1.322     prlw1    2083:                               bp->b_error,
1.166     oster    2084:                               queue->raidPtr->Disks[queue->col].devname);
                   2085:                        queue->raidPtr->Disks[queue->col].status =
1.9       oster    2086:                            rf_ds_failed;
1.166     oster    2087:                        queue->raidPtr->status = rf_rs_degraded;
1.9       oster    2088:                        queue->raidPtr->numFailures++;
1.56      oster    2089:                        queue->raidPtr->numNewFailures++;
1.9       oster    2090:                } else {        /* Disk is already dead... */
                   2091:                        /* printf("Disk already marked as dead!\n"); */
                   2092:                }
1.4       oster    2093:
1.9       oster    2094:        }
1.4       oster    2095:
1.143     oster    2096:        /* Fill in the error value */
1.230     ad       2097:        req->error = bp->b_error;
1.143     oster    2098:
                   2099:        /* Drop this one on the "finished" queue... */
                   2100:        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
                   2101:
                   2102:        /* Let the raidio thread know there is work to be done. */
1.286     mrg      2103:        rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143     oster    2104:
1.286     mrg      2105:        rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1       oster    2106: }
                   2107:
                   2108:
                   2109: /*
                   2110:  * initialize a buf structure for doing an I/O in the kernel.
                   2111:  */
1.186     perry    2112: static void
1.169     oster    2113: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225     christos 2114:        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.169     oster    2115:        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
                   2116:        struct proc *b_proc)
1.9       oster    2117: {
                   2118:        /* bp->b_flags       = B_PHYS | rw_flag; */
1.242     ad       2119:        bp->b_flags = rw_flag;  /* XXX need B_PHYS here too??? */
                   2120:        bp->b_oflags = 0;
                   2121:        bp->b_cflags = 0;
1.9       oster    2122:        bp->b_bcount = numSect << logBytesPerSector;
                   2123:        bp->b_bufsize = bp->b_bcount;
                   2124:        bp->b_error = 0;
                   2125:        bp->b_dev = dev;
1.187     christos 2126:        bp->b_data = bf;
1.275     mrg      2127:        bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9       oster    2128:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
1.1       oster    2129:        if (bp->b_bcount == 0) {
1.141     provos   2130:                panic("bp->b_bcount is zero in InitBP!!");
1.1       oster    2131:        }
1.161     fvdl     2132:        bp->b_proc = b_proc;
1.9       oster    2133:        bp->b_iodone = cbFunc;
1.207     simonb   2134:        bp->b_private = cbArg;
1.1       oster    2135: }
                   2136:
                   2137: /*
                   2138:  * Wait interruptibly for an exclusive lock.
                   2139:  *
                   2140:  * XXX
                   2141:  * Several drivers do this; it should be abstracted and made MP-safe.
                   2142:  * (Hmm... where have we seen this warning before :->  GO )
                   2143:  */
                   2144: static int
1.169     oster    2145: raidlock(struct raid_softc *rs)
1.1       oster    2146: {
1.9       oster    2147:        int     error;
1.1       oster    2148:
1.335     mlelstv  2149:        error = 0;
1.327     pgoyette 2150:        mutex_enter(&rs->sc_mutex);
1.1       oster    2151:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   2152:                rs->sc_flags |= RAIDF_WANTED;
1.327     pgoyette 2153:                error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
                   2154:                if (error != 0)
1.335     mlelstv  2155:                        goto done;
1.1       oster    2156:        }
                   2157:        rs->sc_flags |= RAIDF_LOCKED;
1.335     mlelstv  2158: done:
1.327     pgoyette 2159:        mutex_exit(&rs->sc_mutex);
1.335     mlelstv  2160:        return (error);
1.1       oster    2161: }
                   2162: /*
                   2163:  * Unlock and wake up any waiters.
                   2164:  */
                   2165: static void
1.169     oster    2166: raidunlock(struct raid_softc *rs)
1.1       oster    2167: {
                   2168:
1.327     pgoyette 2169:        mutex_enter(&rs->sc_mutex);
1.1       oster    2170:        rs->sc_flags &= ~RAIDF_LOCKED;
                   2171:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   2172:                rs->sc_flags &= ~RAIDF_WANTED;
1.327     pgoyette 2173:                cv_broadcast(&rs->sc_cv);
1.1       oster    2174:        }
1.327     pgoyette 2175:        mutex_exit(&rs->sc_mutex);
1.11      oster    2176: }
1.186     perry    2177:
1.11      oster    2178:
                   2179: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   2180: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
1.269     jld      2181: #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
1.11      oster    2182:
1.276     mrg      2183: static daddr_t
                   2184: rf_component_info_offset(void)
                   2185: {
                   2186:
                   2187:        return RF_COMPONENT_INFO_OFFSET;
                   2188: }
                   2189:
                   2190: static daddr_t
                   2191: rf_component_info_size(unsigned secsize)
                   2192: {
                   2193:        daddr_t info_size;
                   2194:
                   2195:        KASSERT(secsize);
                   2196:        if (secsize > RF_COMPONENT_INFO_SIZE)
                   2197:                info_size = secsize;
                   2198:        else
                   2199:                info_size = RF_COMPONENT_INFO_SIZE;
                   2200:
                   2201:        return info_size;
                   2202: }
                   2203:
                   2204: static daddr_t
                   2205: rf_parity_map_offset(RF_Raid_t *raidPtr)
                   2206: {
                   2207:        daddr_t map_offset;
                   2208:
                   2209:        KASSERT(raidPtr->bytesPerSector);
                   2210:        if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
                   2211:                map_offset = raidPtr->bytesPerSector;
                   2212:        else
                   2213:                map_offset = RF_COMPONENT_INFO_SIZE;
                   2214:        map_offset += rf_component_info_offset();
                   2215:
                   2216:        return map_offset;
                   2217: }
                   2218:
                   2219: static daddr_t
                   2220: rf_parity_map_size(RF_Raid_t *raidPtr)
                   2221: {
                   2222:        daddr_t map_size;
                   2223:
                   2224:        if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
                   2225:                map_size = raidPtr->bytesPerSector;
                   2226:        else
                   2227:                map_size = RF_PARITY_MAP_SIZE;
                   2228:
                   2229:        return map_size;
                   2230: }
                   2231:
1.186     perry    2232: int
1.269     jld      2233: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12      oster    2234: {
1.269     jld      2235:        RF_ComponentLabel_t *clabel;
                   2236:
                   2237:        clabel = raidget_component_label(raidPtr, col);
                   2238:        clabel->clean = RF_RAID_CLEAN;
                   2239:        raidflush_component_label(raidPtr, col);
1.12      oster    2240:        return(0);
                   2241: }
                   2242:
                   2243:
1.186     perry    2244: int
1.269     jld      2245: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11      oster    2246: {
1.269     jld      2247:        RF_ComponentLabel_t *clabel;
                   2248:
                   2249:        clabel = raidget_component_label(raidPtr, col);
                   2250:        clabel->clean = RF_RAID_DIRTY;
                   2251:        raidflush_component_label(raidPtr, col);
1.11      oster    2252:        return(0);
                   2253: }
                   2254:
                   2255: int
1.269     jld      2256: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2257: {
1.276     mrg      2258:        KASSERT(raidPtr->bytesPerSector);
                   2259:        return raidread_component_label(raidPtr->bytesPerSector,
                   2260:            raidPtr->Disks[col].dev,
1.269     jld      2261:            raidPtr->raid_cinfo[col].ci_vp,
                   2262:            &raidPtr->raid_cinfo[col].ci_label);
                   2263: }
                   2264:
                   2265: RF_ComponentLabel_t *
                   2266: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2267: {
                   2268:        return &raidPtr->raid_cinfo[col].ci_label;
                   2269: }
                   2270:
                   2271: int
                   2272: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2273: {
                   2274:        RF_ComponentLabel_t *label;
                   2275:
                   2276:        label = &raidPtr->raid_cinfo[col].ci_label;
                   2277:        label->mod_counter = raidPtr->mod_counter;
                   2278: #ifndef RF_NO_PARITY_MAP
                   2279:        label->parity_map_modcount = label->mod_counter;
                   2280: #endif
1.276     mrg      2281:        return raidwrite_component_label(raidPtr->bytesPerSector,
                   2282:            raidPtr->Disks[col].dev,
1.269     jld      2283:            raidPtr->raid_cinfo[col].ci_vp, label);
                   2284: }
                   2285:
                   2286:
                   2287: static int
1.276     mrg      2288: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269     jld      2289:     RF_ComponentLabel_t *clabel)
                   2290: {
                   2291:        return raidread_component_area(dev, b_vp, clabel,
                   2292:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2293:            rf_component_info_offset(),
                   2294:            rf_component_info_size(secsize));
1.269     jld      2295: }
                   2296:
                   2297: /* ARGSUSED */
                   2298: static int
                   2299: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
                   2300:     size_t msize, daddr_t offset, daddr_t dsize)
1.11      oster    2301: {
                   2302:        struct buf *bp;
                   2303:        int error;
1.186     perry    2304:
1.11      oster    2305:        /* XXX should probably ensure that we don't try to do this if
1.186     perry    2306:           someone has changed rf_protected_sectors. */
1.11      oster    2307:
1.98      oster    2308:        if (b_vp == NULL) {
                   2309:                /* For whatever reason, this component is not valid.
                   2310:                   Don't try to read a component label from it. */
                   2311:                return(EINVAL);
                   2312:        }
                   2313:
1.11      oster    2314:        /* get a block of the appropriate size... */
1.269     jld      2315:        bp = geteblk((int)dsize);
1.11      oster    2316:        bp->b_dev = dev;
                   2317:
                   2318:        /* get our ducks in a row for the read */
1.269     jld      2319:        bp->b_blkno = offset / DEV_BSIZE;
                   2320:        bp->b_bcount = dsize;
1.100     chs      2321:        bp->b_flags |= B_READ;
1.269     jld      2322:        bp->b_resid = dsize;
1.11      oster    2323:
1.331     mlelstv  2324:        bdev_strategy(bp);
1.340     christos 2325:        error = biowait(bp);
1.11      oster    2326:
                   2327:        if (!error) {
1.269     jld      2328:                memcpy(data, bp->b_data, msize);
1.204     simonb   2329:        }
1.11      oster    2330:
1.233     ad       2331:        brelse(bp, 0);
1.11      oster    2332:        return(error);
                   2333: }
1.269     jld      2334:
                   2335:
                   2336: static int
1.276     mrg      2337: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
                   2338:     RF_ComponentLabel_t *clabel)
1.269     jld      2339: {
                   2340:        return raidwrite_component_area(dev, b_vp, clabel,
                   2341:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2342:            rf_component_info_offset(),
                   2343:            rf_component_info_size(secsize), 0);
1.269     jld      2344: }
                   2345:
1.11      oster    2346: /* ARGSUSED */
1.269     jld      2347: static int
                   2348: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
                   2349:     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11      oster    2350: {
                   2351:        struct buf *bp;
                   2352:        int error;
                   2353:
                   2354:        /* get a block of the appropriate size... */
1.269     jld      2355:        bp = geteblk((int)dsize);
1.11      oster    2356:        bp->b_dev = dev;
                   2357:
                   2358:        /* get our ducks in a row for the write */
1.269     jld      2359:        bp->b_blkno = offset / DEV_BSIZE;
                   2360:        bp->b_bcount = dsize;
                   2361:        bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
                   2362:        bp->b_resid = dsize;
1.11      oster    2363:
1.269     jld      2364:        memset(bp->b_data, 0, dsize);
                   2365:        memcpy(bp->b_data, data, msize);
1.11      oster    2366:
1.331     mlelstv  2367:        bdev_strategy(bp);
1.269     jld      2368:        if (asyncp)
                   2369:                return 0;
1.340     christos 2370:        error = biowait(bp);
1.233     ad       2371:        brelse(bp, 0);
1.11      oster    2372:        if (error) {
1.48      oster    2373: #if 1
1.11      oster    2374:                printf("Failed to write RAID component info!\n");
1.48      oster    2375: #endif
1.11      oster    2376:        }
                   2377:
                   2378:        return(error);
1.1       oster    2379: }
1.12      oster    2380:
1.186     perry    2381: void
1.269     jld      2382: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2383: {
                   2384:        int c;
                   2385:
                   2386:        for (c = 0; c < raidPtr->numCol; c++) {
                   2387:                /* Skip dead disks. */
                   2388:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2389:                        continue;
                   2390:                /* XXXjld: what if an error occurs here? */
                   2391:                raidwrite_component_area(raidPtr->Disks[c].dev,
                   2392:                    raidPtr->raid_cinfo[c].ci_vp, map,
                   2393:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2394:                    rf_parity_map_offset(raidPtr),
                   2395:                    rf_parity_map_size(raidPtr), 0);
1.269     jld      2396:        }
                   2397: }
                   2398:
                   2399: void
                   2400: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2401: {
                   2402:        struct rf_paritymap_ondisk tmp;
1.272     oster    2403:        int c,first;
1.269     jld      2404:
1.272     oster    2405:        first=1;
1.269     jld      2406:        for (c = 0; c < raidPtr->numCol; c++) {
                   2407:                /* Skip dead disks. */
                   2408:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2409:                        continue;
                   2410:                raidread_component_area(raidPtr->Disks[c].dev,
                   2411:                    raidPtr->raid_cinfo[c].ci_vp, &tmp,
                   2412:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2413:                    rf_parity_map_offset(raidPtr),
                   2414:                    rf_parity_map_size(raidPtr));
1.272     oster    2415:                if (first) {
1.269     jld      2416:                        memcpy(map, &tmp, sizeof(*map));
1.272     oster    2417:                        first = 0;
1.269     jld      2418:                } else {
                   2419:                        rf_paritymap_merge(map, &tmp);
                   2420:                }
                   2421:        }
                   2422: }
                   2423:
                   2424: void
1.169     oster    2425: rf_markalldirty(RF_Raid_t *raidPtr)
1.12      oster    2426: {
1.269     jld      2427:        RF_ComponentLabel_t *clabel;
1.146     oster    2428:        int sparecol;
1.166     oster    2429:        int c;
                   2430:        int j;
                   2431:        int scol = -1;
1.12      oster    2432:
                   2433:        raidPtr->mod_counter++;
1.166     oster    2434:        for (c = 0; c < raidPtr->numCol; c++) {
                   2435:                /* we don't want to touch (at all) a disk that has
                   2436:                   failed */
                   2437:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269     jld      2438:                        clabel = raidget_component_label(raidPtr, c);
                   2439:                        if (clabel->status == rf_ds_spared) {
1.186     perry    2440:                                /* XXX do something special...
                   2441:                                   but whatever you do, don't
1.166     oster    2442:                                   try to access it!! */
                   2443:                        } else {
1.269     jld      2444:                                raidmarkdirty(raidPtr, c);
1.12      oster    2445:                        }
1.166     oster    2446:                }
1.186     perry    2447:        }
1.146     oster    2448:
1.12      oster    2449:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2450:                sparecol = raidPtr->numCol + c;
1.166     oster    2451:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2452:                        /*
                   2453:
                   2454:                           we claim this disk is "optimal" if it's
                   2455:                           rf_ds_used_spare, as that means it should be
                   2456:                           directly substitutable for the disk it replaced.
1.12      oster    2457:                           We note that too...
                   2458:
                   2459:                         */
                   2460:
1.166     oster    2461:                        for(j=0;j<raidPtr->numCol;j++) {
                   2462:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2463:                                        scol = j;
                   2464:                                        break;
1.12      oster    2465:                                }
                   2466:                        }
1.186     perry    2467:
1.269     jld      2468:                        clabel = raidget_component_label(raidPtr, sparecol);
1.12      oster    2469:                        /* make sure status is noted */
1.146     oster    2470:
1.269     jld      2471:                        raid_init_component_label(raidPtr, clabel);
1.146     oster    2472:
1.269     jld      2473:                        clabel->row = 0;
                   2474:                        clabel->column = scol;
1.146     oster    2475:                        /* Note: we *don't* change status from rf_ds_used_spare
                   2476:                           to rf_ds_optimal */
                   2477:                        /* clabel.status = rf_ds_optimal; */
1.186     perry    2478:
1.269     jld      2479:                        raidmarkdirty(raidPtr, sparecol);
1.12      oster    2480:                }
                   2481:        }
                   2482: }
                   2483:
1.13      oster    2484:
                   2485: void
1.169     oster    2486: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13      oster    2487: {
1.269     jld      2488:        RF_ComponentLabel_t *clabel;
1.13      oster    2489:        int sparecol;
1.166     oster    2490:        int c;
                   2491:        int j;
                   2492:        int scol;
1.341     christos 2493:        struct raid_softc *rs = raidPtr->softc;
1.13      oster    2494:
                   2495:        scol = -1;
                   2496:
1.186     perry    2497:        /* XXX should do extra checks to make sure things really are clean,
1.13      oster    2498:           rather than blindly setting the clean bit... */
                   2499:
                   2500:        raidPtr->mod_counter++;
                   2501:
1.166     oster    2502:        for (c = 0; c < raidPtr->numCol; c++) {
                   2503:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269     jld      2504:                        clabel = raidget_component_label(raidPtr, c);
1.201     oster    2505:                        /* make sure status is noted */
1.269     jld      2506:                        clabel->status = rf_ds_optimal;
1.201     oster    2507:
1.214     oster    2508:                        /* note what unit we are configured as */
1.341     christos 2509:                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                   2510:                                clabel->last_unit = raidPtr->raidid;
1.214     oster    2511:
1.269     jld      2512:                        raidflush_component_label(raidPtr, c);
1.166     oster    2513:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                   2514:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2515:                                        raidmarkclean(raidPtr, c);
1.91      oster    2516:                                }
1.166     oster    2517:                        }
1.186     perry    2518:                }
1.166     oster    2519:                /* else we don't touch it.. */
1.186     perry    2520:        }
1.63      oster    2521:
                   2522:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2523:                sparecol = raidPtr->numCol + c;
1.110     oster    2524:                /* Need to ensure that the reconstruct actually completed! */
1.166     oster    2525:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2526:                        /*
                   2527:
                   2528:                           we claim this disk is "optimal" if it's
                   2529:                           rf_ds_used_spare, as that means it should be
                   2530:                           directly substitutable for the disk it replaced.
1.63      oster    2531:                           We note that too...
                   2532:
                   2533:                         */
                   2534:
1.166     oster    2535:                        for(j=0;j<raidPtr->numCol;j++) {
                   2536:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2537:                                        scol = j;
                   2538:                                        break;
1.63      oster    2539:                                }
                   2540:                        }
1.186     perry    2541:
1.63      oster    2542:                        /* XXX shouldn't *really* need this... */
1.269     jld      2543:                        clabel = raidget_component_label(raidPtr, sparecol);
1.63      oster    2544:                        /* make sure status is noted */
                   2545:
1.269     jld      2546:                        raid_init_component_label(raidPtr, clabel);
                   2547:
                   2548:                        clabel->column = scol;
                   2549:                        clabel->status = rf_ds_optimal;
1.341     christos 2550:                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                   2551:                                clabel->last_unit = raidPtr->raidid;
1.63      oster    2552:
1.269     jld      2553:                        raidflush_component_label(raidPtr, sparecol);
1.91      oster    2554:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13      oster    2555:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2556:                                        raidmarkclean(raidPtr, sparecol);
1.13      oster    2557:                                }
                   2558:                        }
                   2559:                }
                   2560:        }
1.68      oster    2561: }
                   2562:
                   2563: void
1.169     oster    2564: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69      oster    2565: {
                   2566:
                   2567:        if (vp != NULL) {
                   2568:                if (auto_configured == 1) {
1.96      oster    2569:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    2570:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69      oster    2571:                        vput(vp);
1.186     perry    2572:
                   2573:                } else {
1.244     ad       2574:                        (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69      oster    2575:                }
1.186     perry    2576:        }
1.69      oster    2577: }
                   2578:
                   2579:
                   2580: void
1.169     oster    2581: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68      oster    2582: {
1.186     perry    2583:        int r,c;
1.69      oster    2584:        struct vnode *vp;
                   2585:        int acd;
1.68      oster    2586:
                   2587:
                   2588:        /* We take this opportunity to close the vnodes like we should.. */
                   2589:
1.166     oster    2590:        for (c = 0; c < raidPtr->numCol; c++) {
                   2591:                vp = raidPtr->raid_cinfo[c].ci_vp;
                   2592:                acd = raidPtr->Disks[c].auto_configured;
                   2593:                rf_close_component(raidPtr, vp, acd);
                   2594:                raidPtr->raid_cinfo[c].ci_vp = NULL;
                   2595:                raidPtr->Disks[c].auto_configured = 0;
1.68      oster    2596:        }
1.166     oster    2597:
1.68      oster    2598:        for (r = 0; r < raidPtr->numSpare; r++) {
1.166     oster    2599:                vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
                   2600:                acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69      oster    2601:                rf_close_component(raidPtr, vp, acd);
1.166     oster    2602:                raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
                   2603:                raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68      oster    2604:        }
1.37      oster    2605: }
1.63      oster    2606:
1.37      oster    2607:
1.186     perry    2608: void
1.353     mrg      2609: rf_ReconThread(struct rf_recon_req_internal *req)
1.37      oster    2610: {
                   2611:        int     s;
                   2612:        RF_Raid_t *raidPtr;
                   2613:
                   2614:        s = splbio();
                   2615:        raidPtr = (RF_Raid_t *) req->raidPtr;
                   2616:        raidPtr->recon_in_progress = 1;
                   2617:
1.166     oster    2618:        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37      oster    2619:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
                   2620:
                   2621:        RF_Free(req, sizeof(*req));
                   2622:
                   2623:        raidPtr->recon_in_progress = 0;
                   2624:        splx(s);
                   2625:
                   2626:        /* That's all... */
1.204     simonb   2627:        kthread_exit(0);        /* does not return */
1.37      oster    2628: }
                   2629:
                   2630: void
1.169     oster    2631: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37      oster    2632: {
                   2633:        int retcode;
                   2634:        int s;
                   2635:
1.184     oster    2636:        raidPtr->parity_rewrite_stripes_done = 0;
1.37      oster    2637:        raidPtr->parity_rewrite_in_progress = 1;
                   2638:        s = splbio();
                   2639:        retcode = rf_RewriteParity(raidPtr);
                   2640:        splx(s);
                   2641:        if (retcode) {
1.279     christos 2642:                printf("raid%d: Error re-writing parity (%d)!\n",
                   2643:                    raidPtr->raidid, retcode);
1.37      oster    2644:        } else {
                   2645:                /* set the clean bit!  If we shutdown correctly,
                   2646:                   the clean bit on each component label will get
                   2647:                   set */
                   2648:                raidPtr->parity_good = RF_RAID_CLEAN;
                   2649:        }
                   2650:        raidPtr->parity_rewrite_in_progress = 0;
1.85      oster    2651:
                   2652:        /* Anyone waiting for us to stop?  If so, inform them... */
                   2653:        if (raidPtr->waitShutdown) {
1.357     mrg      2654:                rf_lock_mutex2(raidPtr->rad_lock);
                   2655:                cv_broadcast(&raidPtr->parity_rewrite_cv);
                   2656:                rf_unlock_mutex2(raidPtr->rad_lock);
1.85      oster    2657:        }
1.37      oster    2658:
                   2659:        /* That's all... */
1.204     simonb   2660:        kthread_exit(0);        /* does not return */
1.37      oster    2661: }
                   2662:
                   2663:
                   2664: void
1.169     oster    2665: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37      oster    2666: {
                   2667:        int s;
                   2668:
                   2669:        raidPtr->copyback_in_progress = 1;
                   2670:        s = splbio();
                   2671:        rf_CopybackReconstructedData(raidPtr);
                   2672:        splx(s);
                   2673:        raidPtr->copyback_in_progress = 0;
                   2674:
                   2675:        /* That's all... */
1.204     simonb   2676:        kthread_exit(0);        /* does not return */
1.37      oster    2677: }
                   2678:
                   2679:
                   2680: void
1.353     mrg      2681: rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
1.37      oster    2682: {
                   2683:        int s;
                   2684:        RF_Raid_t *raidPtr;
1.186     perry    2685:
1.37      oster    2686:        s = splbio();
                   2687:        raidPtr = req->raidPtr;
                   2688:        raidPtr->recon_in_progress = 1;
1.166     oster    2689:        rf_ReconstructInPlace(raidPtr, req->col);
1.37      oster    2690:        RF_Free(req, sizeof(*req));
                   2691:        raidPtr->recon_in_progress = 0;
                   2692:        splx(s);
                   2693:
                   2694:        /* That's all... */
1.204     simonb   2695:        kthread_exit(0);        /* does not return */
1.48      oster    2696: }
                   2697:
1.213     christos 2698: static RF_AutoConfig_t *
                   2699: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276     mrg      2700:     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
                   2701:     unsigned secsize)
1.213     christos 2702: {
                   2703:        int good_one = 0;
                   2704:        RF_ComponentLabel_t *clabel;
                   2705:        RF_AutoConfig_t *ac;
                   2706:
                   2707:        clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
                   2708:        if (clabel == NULL) {
                   2709: oomem:
                   2710:                    while(ac_list) {
                   2711:                            ac = ac_list;
                   2712:                            if (ac->clabel)
                   2713:                                    free(ac->clabel, M_RAIDFRAME);
                   2714:                            ac_list = ac_list->next;
                   2715:                            free(ac, M_RAIDFRAME);
                   2716:                    }
                   2717:                    printf("RAID auto config: out of memory!\n");
                   2718:                    return NULL; /* XXX probably should panic? */
                   2719:        }
                   2720:
1.276     mrg      2721:        if (!raidread_component_label(secsize, dev, vp, clabel)) {
                   2722:                /* Got the label.  Does it look reasonable? */
1.284     mrg      2723:                if (rf_reasonable_label(clabel, numsecs) &&
1.282     enami    2724:                    (rf_component_label_partitionsize(clabel) <= size)) {
1.224     oster    2725: #ifdef DEBUG
1.276     mrg      2726:                        printf("Component on: %s: %llu\n",
1.213     christos 2727:                                cname, (unsigned long long)size);
1.276     mrg      2728:                        rf_print_component_label(clabel);
1.213     christos 2729: #endif
1.276     mrg      2730:                        /* if it's reasonable, add it, else ignore it. */
                   2731:                        ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.213     christos 2732:                                M_NOWAIT);
1.276     mrg      2733:                        if (ac == NULL) {
                   2734:                                free(clabel, M_RAIDFRAME);
                   2735:                                goto oomem;
                   2736:                        }
                   2737:                        strlcpy(ac->devname, cname, sizeof(ac->devname));
                   2738:                        ac->dev = dev;
                   2739:                        ac->vp = vp;
                   2740:                        ac->clabel = clabel;
                   2741:                        ac->next = ac_list;
                   2742:                        ac_list = ac;
                   2743:                        good_one = 1;
                   2744:                }
1.213     christos 2745:        }
                   2746:        if (!good_one) {
                   2747:                /* cleanup */
                   2748:                free(clabel, M_RAIDFRAME);
                   2749:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    2750:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213     christos 2751:                vput(vp);
                   2752:        }
                   2753:        return ac_list;
                   2754: }
                   2755:
1.48      oster    2756: RF_AutoConfig_t *
1.259     cegger   2757: rf_find_raid_components(void)
1.48      oster    2758: {
                   2759:        struct vnode *vp;
                   2760:        struct disklabel label;
1.261     dyoung   2761:        device_t dv;
1.268     dyoung   2762:        deviter_t di;
1.48      oster    2763:        dev_t dev;
1.296     buhrow   2764:        int bmajor, bminor, wedge, rf_part_found;
1.48      oster    2765:        int error;
                   2766:        int i;
                   2767:        RF_AutoConfig_t *ac_list;
1.276     mrg      2768:        uint64_t numsecs;
                   2769:        unsigned secsize;
1.335     mlelstv  2770:        int dowedges;
1.48      oster    2771:
                   2772:        /* initialize the AutoConfig list */
                   2773:        ac_list = NULL;
                   2774:
1.335     mlelstv  2775:        /*
                   2776:         * we begin by trolling through *all* the devices on the system *twice*
                   2777:         * first we scan for wedges, second for other devices. This avoids
                   2778:         * using a raw partition instead of a wedge that covers the whole disk
                   2779:         */
1.48      oster    2780:
1.335     mlelstv  2781:        for (dowedges=1; dowedges>=0; --dowedges) {
                   2782:                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
                   2783:                     dv = deviter_next(&di)) {
1.48      oster    2784:
1.335     mlelstv  2785:                        /* we are only interested in disks... */
                   2786:                        if (device_class(dv) != DV_DISK)
                   2787:                                continue;
1.48      oster    2788:
1.335     mlelstv  2789:                        /* we don't care about floppies... */
                   2790:                        if (device_is_a(dv, "fd")) {
                   2791:                                continue;
                   2792:                        }
1.129     oster    2793:
1.335     mlelstv  2794:                        /* we don't care about CD's... */
                   2795:                        if (device_is_a(dv, "cd")) {
                   2796:                                continue;
                   2797:                        }
1.129     oster    2798:
1.335     mlelstv  2799:                        /* we don't care about md's... */
                   2800:                        if (device_is_a(dv, "md")) {
                   2801:                                continue;
                   2802:                        }
1.248     oster    2803:
1.335     mlelstv  2804:                        /* hdfd is the Atari/Hades floppy driver */
                   2805:                        if (device_is_a(dv, "hdfd")) {
                   2806:                                continue;
                   2807:                        }
1.206     thorpej  2808:
1.335     mlelstv  2809:                        /* fdisa is the Atari/Milan floppy driver */
                   2810:                        if (device_is_a(dv, "fdisa")) {
                   2811:                                continue;
                   2812:                        }
1.186     perry    2813:
1.335     mlelstv  2814:                        /* are we in the wedges pass ? */
                   2815:                        wedge = device_is_a(dv, "dk");
                   2816:                        if (wedge != dowedges) {
                   2817:                                continue;
                   2818:                        }
1.48      oster    2819:
1.335     mlelstv  2820:                        /* need to find the device_name_to_block_device_major stuff */
                   2821:                        bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.296     buhrow   2822:
1.335     mlelstv  2823:                        rf_part_found = 0; /*No raid partition as yet*/
1.48      oster    2824:
1.335     mlelstv  2825:                        /* get a vnode for the raw partition of this disk */
                   2826:                        bminor = minor(device_unit(dv));
                   2827:                        dev = wedge ? makedev(bmajor, bminor) :
                   2828:                            MAKEDISKDEV(bmajor, bminor, RAW_PART);
                   2829:                        if (bdevvp(dev, &vp))
                   2830:                                panic("RAID can't alloc vnode");
1.48      oster    2831:
1.335     mlelstv  2832:                        error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48      oster    2833:
1.335     mlelstv  2834:                        if (error) {
                   2835:                                /* "Who cares."  Continue looking
                   2836:                                   for something that exists*/
                   2837:                                vput(vp);
                   2838:                                continue;
                   2839:                        }
1.48      oster    2840:
1.335     mlelstv  2841:                        error = getdisksize(vp, &numsecs, &secsize);
1.213     christos 2842:                        if (error) {
1.339     mlelstv  2843:                                /*
                   2844:                                 * Pseudo devices like vnd and cgd can be
                   2845:                                 * opened but may still need some configuration.
                   2846:                                 * Ignore these quietly.
                   2847:                                 */
                   2848:                                if (error != ENXIO)
                   2849:                                        printf("RAIDframe: can't get disk size"
                   2850:                                            " for dev %s (%d)\n",
                   2851:                                            device_xname(dv), error);
1.241     oster    2852:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   2853:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2854:                                vput(vp);
1.213     christos 2855:                                continue;
                   2856:                        }
1.335     mlelstv  2857:                        if (wedge) {
                   2858:                                struct dkwedge_info dkw;
                   2859:                                error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
                   2860:                                    NOCRED);
                   2861:                                if (error) {
                   2862:                                        printf("RAIDframe: can't get wedge info for "
                   2863:                                            "dev %s (%d)\n", device_xname(dv), error);
                   2864:                                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   2865:                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2866:                                        vput(vp);
                   2867:                                        continue;
                   2868:                                }
1.213     christos 2869:
1.335     mlelstv  2870:                                if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
                   2871:                                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   2872:                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2873:                                        vput(vp);
                   2874:                                        continue;
                   2875:                                }
                   2876:
                   2877:                                ac_list = rf_get_component(ac_list, dev, vp,
                   2878:                                    device_xname(dv), dkw.dkw_size, numsecs, secsize);
                   2879:                                rf_part_found = 1; /*There is a raid component on this disk*/
1.228     christos 2880:                                continue;
1.241     oster    2881:                        }
1.213     christos 2882:
1.335     mlelstv  2883:                        /* Ok, the disk exists.  Go get the disklabel. */
                   2884:                        error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
                   2885:                        if (error) {
                   2886:                                /*
                   2887:                                 * XXX can't happen - open() would
                   2888:                                 * have errored out (or faked up one)
                   2889:                                 */
                   2890:                                if (error != ENOTTY)
                   2891:                                        printf("RAIDframe: can't get label for dev "
                   2892:                                            "%s (%d)\n", device_xname(dv), error);
                   2893:                        }
1.48      oster    2894:
1.335     mlelstv  2895:                        /* don't need this any more.  We'll allocate it again
                   2896:                           a little later if we really do... */
                   2897:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   2898:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2899:                        vput(vp);
1.48      oster    2900:
1.335     mlelstv  2901:                        if (error)
1.48      oster    2902:                                continue;
                   2903:
1.335     mlelstv  2904:                        rf_part_found = 0; /*No raid partitions yet*/
                   2905:                        for (i = 0; i < label.d_npartitions; i++) {
                   2906:                                char cname[sizeof(ac_list->devname)];
                   2907:
                   2908:                                /* We only support partitions marked as RAID */
                   2909:                                if (label.d_partitions[i].p_fstype != FS_RAID)
                   2910:                                        continue;
                   2911:
                   2912:                                dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
                   2913:                                if (bdevvp(dev, &vp))
                   2914:                                        panic("RAID can't alloc vnode");
                   2915:
                   2916:                                error = VOP_OPEN(vp, FREAD, NOCRED);
                   2917:                                if (error) {
                   2918:                                        /* Whatever... */
                   2919:                                        vput(vp);
                   2920:                                        continue;
                   2921:                                }
                   2922:                                snprintf(cname, sizeof(cname), "%s%c",
                   2923:                                    device_xname(dv), 'a' + i);
                   2924:                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                   2925:                                        label.d_partitions[i].p_size, numsecs, secsize);
                   2926:                                rf_part_found = 1; /*There is at least one raid partition on this disk*/
1.48      oster    2927:                        }
1.296     buhrow   2928:
1.335     mlelstv  2929:                        /*
                   2930:                         *If there is no raid component on this disk, either in a
                   2931:                         *disklabel or inside a wedge, check the raw partition as well,
                   2932:                         *as it is possible to configure raid components on raw disk
                   2933:                         *devices.
                   2934:                         */
1.296     buhrow   2935:
1.335     mlelstv  2936:                        if (!rf_part_found) {
                   2937:                                char cname[sizeof(ac_list->devname)];
1.296     buhrow   2938:
1.335     mlelstv  2939:                                dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
                   2940:                                if (bdevvp(dev, &vp))
                   2941:                                        panic("RAID can't alloc vnode");
                   2942:
                   2943:                                error = VOP_OPEN(vp, FREAD, NOCRED);
                   2944:                                if (error) {
                   2945:                                        /* Whatever... */
                   2946:                                        vput(vp);
                   2947:                                        continue;
                   2948:                                }
                   2949:                                snprintf(cname, sizeof(cname), "%s%c",
                   2950:                                    device_xname(dv), 'a' + RAW_PART);
                   2951:                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                   2952:                                        label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.296     buhrow   2953:                        }
1.48      oster    2954:                }
1.335     mlelstv  2955:                deviter_release(&di);
1.48      oster    2956:        }
1.213     christos 2957:        return ac_list;
1.48      oster    2958: }
1.186     perry    2959:
1.213     christos 2960:
1.292     oster    2961: int
1.284     mrg      2962: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48      oster    2963: {
1.186     perry    2964:
1.48      oster    2965:        if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
                   2966:             (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
                   2967:            ((clabel->clean == RF_RAID_CLEAN) ||
                   2968:             (clabel->clean == RF_RAID_DIRTY)) &&
1.186     perry    2969:            clabel->row >=0 &&
                   2970:            clabel->column >= 0 &&
1.48      oster    2971:            clabel->num_rows > 0 &&
                   2972:            clabel->num_columns > 0 &&
1.186     perry    2973:            clabel->row < clabel->num_rows &&
1.48      oster    2974:            clabel->column < clabel->num_columns &&
                   2975:            clabel->blockSize > 0 &&
1.282     enami    2976:            /*
                   2977:             * numBlocksHi may contain garbage, but it is ok since
                   2978:             * the type is unsigned.  If it is really garbage,
                   2979:             * rf_fix_old_label_size() will fix it.
                   2980:             */
                   2981:            rf_component_label_numblocks(clabel) > 0) {
1.284     mrg      2982:                /*
                   2983:                 * label looks reasonable enough...
                   2984:                 * let's make sure it has no old garbage.
                   2985:                 */
1.292     oster    2986:                if (numsecs)
                   2987:                        rf_fix_old_label_size(clabel, numsecs);
1.48      oster    2988:                return(1);
                   2989:        }
                   2990:        return(0);
                   2991: }
                   2992:
                   2993:
1.278     mrg      2994: /*
                   2995:  * For reasons yet unknown, some old component labels have garbage in
                   2996:  * the newer numBlocksHi region, and this causes lossage.  Since those
                   2997:  * disks will also have numsecs set to less than 32 bits of sectors,
1.299     oster    2998:  * we can determine when this corruption has occurred, and fix it.
1.284     mrg      2999:  *
                   3000:  * The exact same problem, with the same unknown reason, happens to
                   3001:  * the partitionSizeHi member as well.
1.278     mrg      3002:  */
                   3003: static void
                   3004: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
                   3005: {
                   3006:
1.284     mrg      3007:        if (numsecs < ((uint64_t)1 << 32)) {
                   3008:                if (clabel->numBlocksHi) {
                   3009:                        printf("WARNING: total sectors < 32 bits, yet "
                   3010:                               "numBlocksHi set\n"
                   3011:                               "WARNING: resetting numBlocksHi to zero.\n");
                   3012:                        clabel->numBlocksHi = 0;
                   3013:                }
                   3014:
                   3015:                if (clabel->partitionSizeHi) {
                   3016:                        printf("WARNING: total sectors < 32 bits, yet "
                   3017:                               "partitionSizeHi set\n"
                   3018:                               "WARNING: resetting partitionSizeHi to zero.\n");
                   3019:                        clabel->partitionSizeHi = 0;
                   3020:                }
1.278     mrg      3021:        }
                   3022: }
                   3023:
                   3024:
1.224     oster    3025: #ifdef DEBUG
1.48      oster    3026: void
1.169     oster    3027: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48      oster    3028: {
1.282     enami    3029:        uint64_t numBlocks;
1.308     christos 3030:        static const char *rp[] = {
                   3031:            "No", "Force", "Soft", "*invalid*"
                   3032:        };
                   3033:
1.275     mrg      3034:
1.282     enami    3035:        numBlocks = rf_component_label_numblocks(clabel);
1.275     mrg      3036:
1.48      oster    3037:        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186     perry    3038:               clabel->row, clabel->column,
1.48      oster    3039:               clabel->num_rows, clabel->num_columns);
                   3040:        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
                   3041:               clabel->version, clabel->serial_number,
                   3042:               clabel->mod_counter);
                   3043:        printf("   Clean: %s Status: %d\n",
1.271     dyoung   3044:               clabel->clean ? "Yes" : "No", clabel->status);
1.48      oster    3045:        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
                   3046:               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275     mrg      3047:        printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
                   3048:               (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271     dyoung   3049:        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.308     christos 3050:        printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
1.271     dyoung   3051:        printf("   Last configured as: raid%d\n", clabel->last_unit);
1.51      oster    3052: #if 0
                   3053:           printf("   Config order: %d\n", clabel->config_order);
                   3054: #endif
1.186     perry    3055:
1.48      oster    3056: }
1.133     oster    3057: #endif
1.48      oster    3058:
                   3059: RF_ConfigSet_t *
1.169     oster    3060: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48      oster    3061: {
                   3062:        RF_AutoConfig_t *ac;
                   3063:        RF_ConfigSet_t *config_sets;
                   3064:        RF_ConfigSet_t *cset;
                   3065:        RF_AutoConfig_t *ac_next;
                   3066:
                   3067:
                   3068:        config_sets = NULL;
                   3069:
                   3070:        /* Go through the AutoConfig list, and figure out which components
                   3071:           belong to what sets.  */
                   3072:        ac = ac_list;
                   3073:        while(ac!=NULL) {
                   3074:                /* we're going to putz with ac->next, so save it here
                   3075:                   for use at the end of the loop */
                   3076:                ac_next = ac->next;
                   3077:
                   3078:                if (config_sets == NULL) {
                   3079:                        /* will need at least this one... */
                   3080:                        config_sets = (RF_ConfigSet_t *)
1.186     perry    3081:                                malloc(sizeof(RF_ConfigSet_t),
1.48      oster    3082:                                       M_RAIDFRAME, M_NOWAIT);
                   3083:                        if (config_sets == NULL) {
1.141     provos   3084:                                panic("rf_create_auto_sets: No memory!");
1.48      oster    3085:                        }
                   3086:                        /* this one is easy :) */
                   3087:                        config_sets->ac = ac;
                   3088:                        config_sets->next = NULL;
1.51      oster    3089:                        config_sets->rootable = 0;
1.48      oster    3090:                        ac->next = NULL;
                   3091:                } else {
                   3092:                        /* which set does this component fit into? */
                   3093:                        cset = config_sets;
                   3094:                        while(cset!=NULL) {
1.49      oster    3095:                                if (rf_does_it_fit(cset, ac)) {
1.86      oster    3096:                                        /* looks like it matches... */
                   3097:                                        ac->next = cset->ac;
                   3098:                                        cset->ac = ac;
1.48      oster    3099:                                        break;
                   3100:                                }
                   3101:                                cset = cset->next;
                   3102:                        }
                   3103:                        if (cset==NULL) {
                   3104:                                /* didn't find a match above... new set..*/
                   3105:                                cset = (RF_ConfigSet_t *)
1.186     perry    3106:                                        malloc(sizeof(RF_ConfigSet_t),
1.48      oster    3107:                                               M_RAIDFRAME, M_NOWAIT);
                   3108:                                if (cset == NULL) {
1.141     provos   3109:                                        panic("rf_create_auto_sets: No memory!");
1.48      oster    3110:                                }
                   3111:                                cset->ac = ac;
                   3112:                                ac->next = NULL;
                   3113:                                cset->next = config_sets;
1.51      oster    3114:                                cset->rootable = 0;
1.48      oster    3115:                                config_sets = cset;
                   3116:                        }
                   3117:                }
                   3118:                ac = ac_next;
                   3119:        }
                   3120:
                   3121:
                   3122:        return(config_sets);
                   3123: }
                   3124:
                   3125: static int
1.169     oster    3126: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48      oster    3127: {
                   3128:        RF_ComponentLabel_t *clabel1, *clabel2;
                   3129:
                   3130:        /* If this one matches the *first* one in the set, that's good
                   3131:           enough, since the other members of the set would have been
                   3132:           through here too... */
1.60      oster    3133:        /* note that we are not checking partitionSize here..
                   3134:
                   3135:           Note that we are also not checking the mod_counters here.
1.299     oster    3136:           If everything else matches except the mod_counter, that's
1.60      oster    3137:           good enough for this test.  We will deal with the mod_counters
1.186     perry    3138:           a little later in the autoconfiguration process.
1.60      oster    3139:
                   3140:            (clabel1->mod_counter == clabel2->mod_counter) &&
1.81      oster    3141:
                   3142:           The reason we don't check for this is that failed disks
                   3143:           will have lower modification counts.  If those disks are
                   3144:           not added to the set they used to belong to, then they will
                   3145:           form their own set, which may result in 2 different sets,
                   3146:           for example, competing to be configured at raid0, and
                   3147:           perhaps competing to be the root filesystem set.  If the
                   3148:           wrong ones get configured, or both attempt to become /,
                   3149:           weird behaviour and or serious lossage will occur.  Thus we
                   3150:           need to bring them into the fold here, and kick them out at
                   3151:           a later point.
1.60      oster    3152:
                   3153:        */
1.48      oster    3154:
                   3155:        clabel1 = cset->ac->clabel;
                   3156:        clabel2 = ac->clabel;
                   3157:        if ((clabel1->version == clabel2->version) &&
                   3158:            (clabel1->serial_number == clabel2->serial_number) &&
                   3159:            (clabel1->num_rows == clabel2->num_rows) &&
                   3160:            (clabel1->num_columns == clabel2->num_columns) &&
                   3161:            (clabel1->sectPerSU == clabel2->sectPerSU) &&
                   3162:            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
                   3163:            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
                   3164:            (clabel1->parityConfig == clabel2->parityConfig) &&
                   3165:            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
                   3166:            (clabel1->blockSize == clabel2->blockSize) &&
1.282     enami    3167:            rf_component_label_numblocks(clabel1) ==
                   3168:            rf_component_label_numblocks(clabel2) &&
1.48      oster    3169:            (clabel1->autoconfigure == clabel2->autoconfigure) &&
                   3170:            (clabel1->root_partition == clabel2->root_partition) &&
                   3171:            (clabel1->last_unit == clabel2->last_unit) &&
                   3172:            (clabel1->config_order == clabel2->config_order)) {
                   3173:                /* if it get's here, it almost *has* to be a match */
                   3174:        } else {
1.186     perry    3175:                /* it's not consistent with somebody in the set..
1.48      oster    3176:                   punt */
                   3177:                return(0);
                   3178:        }
                   3179:        /* all was fine.. it must fit... */
                   3180:        return(1);
                   3181: }
                   3182:
                   3183: int
1.169     oster    3184: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48      oster    3185: {
1.51      oster    3186:        RF_AutoConfig_t *ac;
                   3187:        RF_AutoConfig_t *auto_config;
                   3188:        RF_ComponentLabel_t *clabel;
1.166     oster    3189:        int c;
1.51      oster    3190:        int num_cols;
                   3191:        int num_missing;
1.86      oster    3192:        int mod_counter;
1.87      oster    3193:        int mod_counter_found;
1.88      oster    3194:        int even_pair_failed;
                   3195:        char parity_type;
1.186     perry    3196:
1.51      oster    3197:
1.48      oster    3198:        /* check to see that we have enough 'live' components
                   3199:           of this set.  If so, we can configure it if necessary */
                   3200:
1.51      oster    3201:        num_cols = cset->ac->clabel->num_columns;
1.88      oster    3202:        parity_type = cset->ac->clabel->parityConfig;
1.51      oster    3203:
                   3204:        /* XXX Check for duplicate components!?!?!? */
                   3205:
1.86      oster    3206:        /* Determine what the mod_counter is supposed to be for this set. */
                   3207:
1.87      oster    3208:        mod_counter_found = 0;
1.101     oster    3209:        mod_counter = 0;
1.86      oster    3210:        ac = cset->ac;
                   3211:        while(ac!=NULL) {
1.87      oster    3212:                if (mod_counter_found==0) {
1.86      oster    3213:                        mod_counter = ac->clabel->mod_counter;
1.87      oster    3214:                        mod_counter_found = 1;
                   3215:                } else {
                   3216:                        if (ac->clabel->mod_counter > mod_counter) {
                   3217:                                mod_counter = ac->clabel->mod_counter;
                   3218:                        }
1.86      oster    3219:                }
                   3220:                ac = ac->next;
                   3221:        }
                   3222:
1.51      oster    3223:        num_missing = 0;
                   3224:        auto_config = cset->ac;
                   3225:
1.166     oster    3226:        even_pair_failed = 0;
                   3227:        for(c=0; c<num_cols; c++) {
                   3228:                ac = auto_config;
                   3229:                while(ac!=NULL) {
1.186     perry    3230:                        if ((ac->clabel->column == c) &&
1.166     oster    3231:                            (ac->clabel->mod_counter == mod_counter)) {
                   3232:                                /* it's this one... */
1.224     oster    3233: #ifdef DEBUG
1.166     oster    3234:                                printf("Found: %s at %d\n",
                   3235:                                       ac->devname,c);
1.51      oster    3236: #endif
1.166     oster    3237:                                break;
1.51      oster    3238:                        }
1.166     oster    3239:                        ac=ac->next;
                   3240:                }
                   3241:                if (ac==NULL) {
1.51      oster    3242:                                /* Didn't find one here! */
1.88      oster    3243:                                /* special case for RAID 1, especially
                   3244:                                   where there are more than 2
                   3245:                                   components (where RAIDframe treats
                   3246:                                   things a little differently :( ) */
1.166     oster    3247:                        if (parity_type == '1') {
                   3248:                                if (c%2 == 0) { /* even component */
                   3249:                                        even_pair_failed = 1;
                   3250:                                } else { /* odd component.  If
                   3251:                                            we're failed, and
                   3252:                                            so is the even
                   3253:                                            component, it's
                   3254:                                            "Good Night, Charlie" */
                   3255:                                        if (even_pair_failed == 1) {
                   3256:                                                return(0);
1.88      oster    3257:                                        }
                   3258:                                }
1.166     oster    3259:                        } else {
                   3260:                                /* normal accounting */
                   3261:                                num_missing++;
1.88      oster    3262:                        }
1.166     oster    3263:                }
                   3264:                if ((parity_type == '1') && (c%2 == 1)) {
1.88      oster    3265:                                /* Just did an even component, and we didn't
1.186     perry    3266:                                   bail.. reset the even_pair_failed flag,
1.88      oster    3267:                                   and go on to the next component.... */
1.166     oster    3268:                        even_pair_failed = 0;
1.51      oster    3269:                }
                   3270:        }
                   3271:
                   3272:        clabel = cset->ac->clabel;
                   3273:
                   3274:        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
                   3275:            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
                   3276:            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                   3277:                /* XXX this needs to be made *much* more general */
                   3278:                /* Too many failures */
                   3279:                return(0);
                   3280:        }
                   3281:        /* otherwise, all is well, and we've got enough to take a kick
                   3282:           at autoconfiguring this set */
                   3283:        return(1);
1.48      oster    3284: }
                   3285:
                   3286: void
1.169     oster    3287: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222     christos 3288:                        RF_Raid_t *raidPtr)
1.48      oster    3289: {
                   3290:        RF_ComponentLabel_t *clabel;
1.77      oster    3291:        int i;
1.48      oster    3292:
                   3293:        clabel = ac->clabel;
                   3294:
                   3295:        /* 1. Fill in the common stuff */
                   3296:        config->numCol = clabel->num_columns;
                   3297:        config->numSpare = 0; /* XXX should this be set here? */
                   3298:        config->sectPerSU = clabel->sectPerSU;
                   3299:        config->SUsPerPU = clabel->SUsPerPU;
                   3300:        config->SUsPerRU = clabel->SUsPerRU;
                   3301:        config->parityConfig = clabel->parityConfig;
                   3302:        /* XXX... */
                   3303:        strcpy(config->diskQueueType,"fifo");
                   3304:        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
                   3305:        config->layoutSpecificSize = 0; /* XXX ?? */
                   3306:
                   3307:        while(ac!=NULL) {
                   3308:                /* row/col values will be in range due to the checks
                   3309:                   in reasonable_label() */
1.166     oster    3310:                strcpy(config->devnames[0][ac->clabel->column],
1.48      oster    3311:                       ac->devname);
                   3312:                ac = ac->next;
                   3313:        }
                   3314:
1.77      oster    3315:        for(i=0;i<RF_MAXDBGV;i++) {
1.163     fvdl     3316:                config->debugVars[i][0] = 0;
1.77      oster    3317:        }
1.48      oster    3318: }
                   3319:
                   3320: int
1.169     oster    3321: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3322: {
1.269     jld      3323:        RF_ComponentLabel_t *clabel;
1.166     oster    3324:        int column;
1.148     oster    3325:        int sparecol;
1.48      oster    3326:
1.54      oster    3327:        raidPtr->autoconfigure = new_value;
1.166     oster    3328:
                   3329:        for(column=0; column<raidPtr->numCol; column++) {
                   3330:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3331:                        clabel = raidget_component_label(raidPtr, column);
                   3332:                        clabel->autoconfigure = new_value;
                   3333:                        raidflush_component_label(raidPtr, column);
1.48      oster    3334:                }
                   3335:        }
1.148     oster    3336:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3337:                sparecol = raidPtr->numCol + column;
1.166     oster    3338:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3339:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3340:                        clabel->autoconfigure = new_value;
                   3341:                        raidflush_component_label(raidPtr, sparecol);
1.148     oster    3342:                }
                   3343:        }
1.48      oster    3344:        return(new_value);
                   3345: }
                   3346:
                   3347: int
1.169     oster    3348: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3349: {
1.269     jld      3350:        RF_ComponentLabel_t *clabel;
1.166     oster    3351:        int column;
1.148     oster    3352:        int sparecol;
1.48      oster    3353:
1.54      oster    3354:        raidPtr->root_partition = new_value;
1.166     oster    3355:        for(column=0; column<raidPtr->numCol; column++) {
                   3356:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3357:                        clabel = raidget_component_label(raidPtr, column);
                   3358:                        clabel->root_partition = new_value;
                   3359:                        raidflush_component_label(raidPtr, column);
1.148     oster    3360:                }
                   3361:        }
                   3362:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3363:                sparecol = raidPtr->numCol + column;
1.166     oster    3364:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3365:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3366:                        clabel->root_partition = new_value;
                   3367:                        raidflush_component_label(raidPtr, sparecol);
1.48      oster    3368:                }
                   3369:        }
                   3370:        return(new_value);
                   3371: }
                   3372:
                   3373: void
1.169     oster    3374: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48      oster    3375: {
                   3376:        RF_AutoConfig_t *ac;
1.186     perry    3377:
1.48      oster    3378:        ac = cset->ac;
                   3379:        while(ac!=NULL) {
                   3380:                /* Close the vp, and give it back */
                   3381:                if (ac->vp) {
1.96      oster    3382:                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.335     mlelstv  3383:                        VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
1.48      oster    3384:                        vput(ac->vp);
1.86      oster    3385:                        ac->vp = NULL;
1.48      oster    3386:                }
                   3387:                ac = ac->next;
                   3388:        }
                   3389: }
                   3390:
                   3391:
                   3392: void
1.169     oster    3393: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48      oster    3394: {
                   3395:        RF_AutoConfig_t *ac;
                   3396:        RF_AutoConfig_t *next_ac;
1.186     perry    3397:
1.48      oster    3398:        ac = cset->ac;
                   3399:        while(ac!=NULL) {
                   3400:                next_ac = ac->next;
                   3401:                /* nuke the label */
                   3402:                free(ac->clabel, M_RAIDFRAME);
                   3403:                /* cleanup the config structure */
                   3404:                free(ac, M_RAIDFRAME);
                   3405:                /* "next.." */
                   3406:                ac = next_ac;
                   3407:        }
                   3408:        /* and, finally, nuke the config set */
                   3409:        free(cset, M_RAIDFRAME);
                   3410: }
                   3411:
                   3412:
                   3413: void
1.169     oster    3414: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48      oster    3415: {
                   3416:        /* current version number */
1.186     perry    3417:        clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57      oster    3418:        clabel->serial_number = raidPtr->serial_number;
1.48      oster    3419:        clabel->mod_counter = raidPtr->mod_counter;
1.269     jld      3420:
1.166     oster    3421:        clabel->num_rows = 1;
1.48      oster    3422:        clabel->num_columns = raidPtr->numCol;
                   3423:        clabel->clean = RF_RAID_DIRTY; /* not clean */
                   3424:        clabel->status = rf_ds_optimal; /* "It's good!" */
1.186     perry    3425:
1.48      oster    3426:        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                   3427:        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
                   3428:        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54      oster    3429:
                   3430:        clabel->blockSize = raidPtr->bytesPerSector;
1.282     enami    3431:        rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54      oster    3432:
1.48      oster    3433:        /* XXX not portable */
                   3434:        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54      oster    3435:        clabel->maxOutstanding = raidPtr->maxOutstanding;
                   3436:        clabel->autoconfigure = raidPtr->autoconfigure;
                   3437:        clabel->root_partition = raidPtr->root_partition;
1.48      oster    3438:        clabel->last_unit = raidPtr->raidid;
1.54      oster    3439:        clabel->config_order = raidPtr->config_order;
1.269     jld      3440:
                   3441: #ifndef RF_NO_PARITY_MAP
                   3442:        rf_paritymap_init_label(raidPtr->parity_map, clabel);
                   3443: #endif
1.51      oster    3444: }
                   3445:
1.300     christos 3446: struct raid_softc *
                   3447: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51      oster    3448: {
                   3449:        RF_Raid_t *raidPtr;
                   3450:        RF_Config_t *config;
                   3451:        int raidID;
1.300     christos 3452:        struct raid_softc *sc;
1.51      oster    3453:
1.224     oster    3454: #ifdef DEBUG
1.72      oster    3455:        printf("RAID autoconfigure\n");
1.127     oster    3456: #endif
1.51      oster    3457:
                   3458:        /* 1. Create a config structure */
1.300     christos 3459:        config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
                   3460:        if (config == NULL) {
1.327     pgoyette 3461:                printf("%s: Out of mem - config!?!?\n", __func__);
1.51      oster    3462:                                /* XXX do something more intelligent here. */
1.300     christos 3463:                return NULL;
1.51      oster    3464:        }
1.77      oster    3465:
1.186     perry    3466:        /*
                   3467:           2. Figure out what RAID ID this one is supposed to live at
1.51      oster    3468:           See if we can get the same RAID dev that it was configured
1.186     perry    3469:           on last time..
1.51      oster    3470:        */
                   3471:
                   3472:        raidID = cset->ac->clabel->last_unit;
1.327     pgoyette 3473:        for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
                   3474:             sc = raidget(++raidID, false))
1.300     christos 3475:                continue;
1.224     oster    3476: #ifdef DEBUG
1.72      oster    3477:        printf("Configuring raid%d:\n",raidID);
1.127     oster    3478: #endif
                   3479:
1.327     pgoyette 3480:        if (sc == NULL)
                   3481:                sc = raidget(raidID, true);
                   3482:        if (sc == NULL) {
                   3483:                printf("%s: Out of mem - softc!?!?\n", __func__);
                   3484:                                /* XXX do something more intelligent here. */
                   3485:                free(config, M_RAIDFRAME);
                   3486:                return NULL;
                   3487:        }
                   3488:
1.300     christos 3489:        raidPtr = &sc->sc_r;
1.51      oster    3490:
                   3491:        /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302     christos 3492:        raidPtr->softc = sc;
1.51      oster    3493:        raidPtr->raidid = raidID;
                   3494:        raidPtr->openings = RAIDOUTSTANDING;
                   3495:
                   3496:        /* 3. Build the configuration structure */
                   3497:        rf_create_configuration(cset->ac, config, raidPtr);
                   3498:
                   3499:        /* 4. Do the configuration */
1.300     christos 3500:        if (rf_Configure(raidPtr, config, cset->ac) == 0) {
                   3501:                raidinit(sc);
1.186     perry    3502:
1.300     christos 3503:                rf_markalldirty(raidPtr);
                   3504:                raidPtr->autoconfigure = 1; /* XXX do this here? */
1.308     christos 3505:                switch (cset->ac->clabel->root_partition) {
                   3506:                case 1: /* Force Root */
                   3507:                case 2: /* Soft Root: root when boot partition part of raid */
                   3508:                        /*
                   3509:                         * everything configured just fine.  Make a note
                   3510:                         * that this set is eligible to be root,
                   3511:                         * or forced to be root
                   3512:                         */
                   3513:                        cset->rootable = cset->ac->clabel->root_partition;
1.54      oster    3514:                        /* XXX do this here? */
1.308     christos 3515:                        raidPtr->root_partition = cset->rootable;
                   3516:                        break;
                   3517:                default:
                   3518:                        break;
1.51      oster    3519:                }
1.300     christos 3520:        } else {
                   3521:                raidput(sc);
                   3522:                sc = NULL;
1.51      oster    3523:        }
                   3524:
                   3525:        /* 5. Cleanup */
                   3526:        free(config, M_RAIDFRAME);
1.300     christos 3527:        return sc;
1.99      oster    3528: }
                   3529:
                   3530: void
1.187     christos 3531: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
                   3532:             size_t xmin, size_t xmax)
1.177     oster    3533: {
1.352     christos 3534:        int error;
                   3535:
1.227     ad       3536:        pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187     christos 3537:        pool_sethiwat(p, xmax);
1.352     christos 3538:        if ((error = pool_prime(p, xmin)) != 0)
                   3539:                panic("%s: failed to prime pool: %d", __func__, error);
1.187     christos 3540:        pool_setlowat(p, xmin);
1.177     oster    3541: }
1.190     oster    3542:
                   3543: /*
1.335     mlelstv  3544:  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
                   3545:  * to see if there is IO pending and if that IO could possibly be done
                   3546:  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
1.190     oster    3547:  * otherwise.
                   3548:  *
                   3549:  */
                   3550: int
1.300     christos 3551: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190     oster    3552: {
1.335     mlelstv  3553:        struct raid_softc *rs;
                   3554:        struct dk_softc *dksc;
                   3555:
                   3556:        rs = raidPtr->softc;
                   3557:        dksc = &rs->sc_dksc;
                   3558:
                   3559:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                   3560:                return 1;
                   3561:
                   3562:        if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
1.190     oster    3563:                /* there is work to do */
                   3564:                return 0;
1.335     mlelstv  3565:        }
1.190     oster    3566:        /* default is nothing to do */
                   3567:        return 1;
                   3568: }
1.213     christos 3569:
                   3570: int
1.294     oster    3571: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213     christos 3572: {
1.275     mrg      3573:        uint64_t numsecs;
                   3574:        unsigned secsize;
1.213     christos 3575:        int error;
                   3576:
1.275     mrg      3577:        error = getdisksize(vp, &numsecs, &secsize);
1.213     christos 3578:        if (error == 0) {
1.275     mrg      3579:                diskPtr->blockSize = secsize;
                   3580:                diskPtr->numBlocks = numsecs - rf_protectedSectors;
                   3581:                diskPtr->partitionSize = numsecs;
1.213     christos 3582:                return 0;
                   3583:        }
                   3584:        return error;
                   3585: }
1.217     oster    3586:
                   3587: static int
1.261     dyoung   3588: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217     oster    3589: {
                   3590:        return 1;
                   3591: }
                   3592:
                   3593: static void
1.261     dyoung   3594: raid_attach(device_t parent, device_t self, void *aux)
1.217     oster    3595: {
                   3596: }
                   3597:
                   3598:
                   3599: static int
1.261     dyoung   3600: raid_detach(device_t self, int flags)
1.217     oster    3601: {
1.266     dyoung   3602:        int error;
1.335     mlelstv  3603:        struct raid_softc *rs = raidsoftc(self);
1.303     christos 3604:
                   3605:        if (rs == NULL)
                   3606:                return ENXIO;
1.266     dyoung   3607:
                   3608:        if ((error = raidlock(rs)) != 0)
                   3609:                return (error);
1.217     oster    3610:
1.266     dyoung   3611:        error = raid_detach_unlocked(rs);
                   3612:
1.332     mlelstv  3613:        raidunlock(rs);
                   3614:
                   3615:        /* XXX raid can be referenced here */
                   3616:
                   3617:        if (error)
                   3618:                return error;
                   3619:
                   3620:        /* Free the softc */
                   3621:        raidput(rs);
                   3622:
                   3623:        return 0;
1.217     oster    3624: }
                   3625:
1.234     oster    3626: static void
1.304     christos 3627: rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
1.234     oster    3628: {
1.335     mlelstv  3629:        struct dk_softc *dksc = &rs->sc_dksc;
                   3630:        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1.304     christos 3631:
                   3632:        memset(dg, 0, sizeof(*dg));
                   3633:
                   3634:        dg->dg_secperunit = raidPtr->totalSectors;
                   3635:        dg->dg_secsize = raidPtr->bytesPerSector;
                   3636:        dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
                   3637:        dg->dg_ntracks = 4 * raidPtr->numCol;
                   3638:
1.335     mlelstv  3639:        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1.234     oster    3640: }
1.252     oster    3641:
1.348     jdolecek 3642: /*
                   3643:  * Get cache info for all the components (including spares).
                   3644:  * Returns intersection of all the cache flags of all disks, or first
                   3645:  * error if any encountered.
                   3646:  * XXXfua feature flags can change as spares are added - lock down somehow
                   3647:  */
                   3648: static int
                   3649: rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
                   3650: {
                   3651:        int c;
                   3652:        int error;
                   3653:        int dkwhole = 0, dkpart;
                   3654:
                   3655:        for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
                   3656:                /*
                   3657:                 * Check any non-dead disk, even when currently being
                   3658:                 * reconstructed.
                   3659:                 */
                   3660:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
                   3661:                    || raidPtr->Disks[c].status == rf_ds_reconstructing) {
                   3662:                        error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
                   3663:                            DIOCGCACHE, &dkpart, FREAD, NOCRED);
                   3664:                        if (error) {
                   3665:                                if (error != ENODEV) {
                   3666:                                        printf("raid%d: get cache for component %s failed\n",
                   3667:                                            raidPtr->raidid,
                   3668:                                            raidPtr->Disks[c].devname);
                   3669:                                }
                   3670:
                   3671:                                return error;
                   3672:                        }
                   3673:
                   3674:                        if (c == 0)
                   3675:                                dkwhole = dkpart;
                   3676:                        else
                   3677:                                dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
                   3678:                }
                   3679:        }
                   3680:
1.349     jdolecek 3681:        *data = dkwhole;
1.348     jdolecek 3682:
                   3683:        return 0;
                   3684: }
                   3685:
1.252     oster    3686: /*
                   3687:  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
                   3688:  * We end up returning whatever error was returned by the first cache flush
                   3689:  * that fails.
                   3690:  */
                   3691:
1.269     jld      3692: int
1.252     oster    3693: rf_sync_component_caches(RF_Raid_t *raidPtr)
                   3694: {
                   3695:        int c, sparecol;
                   3696:        int e,error;
                   3697:        int force = 1;
                   3698:
                   3699:        error = 0;
                   3700:        for (c = 0; c < raidPtr->numCol; c++) {
                   3701:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                   3702:                        e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
                   3703:                                          &force, FWRITE, NOCRED);
                   3704:                        if (e) {
1.255     oster    3705:                                if (e != ENODEV)
                   3706:                                        printf("raid%d: cache flush to component %s failed.\n",
                   3707:                                               raidPtr->raidid, raidPtr->Disks[c].devname);
1.252     oster    3708:                                if (error == 0) {
                   3709:                                        error = e;
                   3710:                                }
                   3711:                        }
                   3712:                }
                   3713:        }
                   3714:
                   3715:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   3716:                sparecol = raidPtr->numCol + c;
                   3717:                /* Need to ensure that the reconstruct actually completed! */
                   3718:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                   3719:                        e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
                   3720:                                          DIOCCACHESYNC, &force, FWRITE, NOCRED);
                   3721:                        if (e) {
1.255     oster    3722:                                if (e != ENODEV)
                   3723:                                        printf("raid%d: cache flush to component %s failed.\n",
                   3724:                                               raidPtr->raidid, raidPtr->Disks[sparecol].devname);
1.252     oster    3725:                                if (error == 0) {
                   3726:                                        error = e;
                   3727:                                }
                   3728:                        }
                   3729:                }
                   3730:        }
                   3731:        return error;
                   3732: }
1.327     pgoyette 3733:
1.353     mrg      3734: /* Fill in info with the current status */
                   3735: void
                   3736: rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3737: {
                   3738:
                   3739:        if (raidPtr->status != rf_rs_reconstructing) {
                   3740:                info->total = 100;
                   3741:                info->completed = 100;
                   3742:        } else {
                   3743:                info->total = raidPtr->reconControl->numRUsTotal;
                   3744:                info->completed = raidPtr->reconControl->numRUsComplete;
                   3745:        }
                   3746:        info->remaining = info->total - info->completed;
                   3747: }
                   3748:
                   3749: /* Fill in info with the current status */
                   3750: void
                   3751: rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3752: {
                   3753:
                   3754:        if (raidPtr->parity_rewrite_in_progress == 1) {
                   3755:                info->total = raidPtr->Layout.numStripe;
                   3756:                info->completed = raidPtr->parity_rewrite_stripes_done;
                   3757:        } else {
                   3758:                info->completed = 100;
                   3759:                info->total = 100;
                   3760:        }
                   3761:        info->remaining = info->total - info->completed;
                   3762: }
                   3763:
                   3764: /* Fill in info with the current status */
                   3765: void
                   3766: rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3767: {
                   3768:
                   3769:        if (raidPtr->copyback_in_progress == 1) {
                   3770:                info->total = raidPtr->Layout.numStripe;
                   3771:                info->completed = raidPtr->copyback_stripes_done;
                   3772:                info->remaining = info->total - info->completed;
                   3773:        } else {
                   3774:                info->remaining = 0;
                   3775:                info->completed = 100;
                   3776:                info->total = 100;
                   3777:        }
                   3778: }
                   3779:
                   3780: /* Fill in config with the current info */
                   3781: int
                   3782: rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
                   3783: {
                   3784:        int     d, i, j;
                   3785:
                   3786:        if (!raidPtr->valid)
                   3787:                return (ENODEV);
                   3788:        config->cols = raidPtr->numCol;
                   3789:        config->ndevs = raidPtr->numCol;
                   3790:        if (config->ndevs >= RF_MAX_DISKS)
                   3791:                return (ENOMEM);
                   3792:        config->nspares = raidPtr->numSpare;
                   3793:        if (config->nspares >= RF_MAX_DISKS)
                   3794:                return (ENOMEM);
                   3795:        config->maxqdepth = raidPtr->maxQueueDepth;
                   3796:        d = 0;
                   3797:        for (j = 0; j < config->cols; j++) {
                   3798:                config->devs[d] = raidPtr->Disks[j];
                   3799:                d++;
                   3800:        }
                   3801:        for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
                   3802:                config->spares[i] = raidPtr->Disks[j];
                   3803:                if (config->spares[i].status == rf_ds_rebuilding_spare) {
                   3804:                        /* XXX: raidctl(8) expects to see this as a used spare */
                   3805:                        config->spares[i].status = rf_ds_used_spare;
                   3806:                }
                   3807:        }
                   3808:        return 0;
                   3809: }
                   3810:
                   3811: int
                   3812: rf_get_component_label(RF_Raid_t *raidPtr, void *data)
                   3813: {
                   3814:        RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
                   3815:        RF_ComponentLabel_t *raid_clabel;
                   3816:        int column = clabel->column;
                   3817:
                   3818:        if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
                   3819:                return EINVAL;
                   3820:        raid_clabel = raidget_component_label(raidPtr, column);
                   3821:        memcpy(clabel, raid_clabel, sizeof *clabel);
                   3822:
                   3823:        return 0;
                   3824: }
                   3825:
1.327     pgoyette 3826: /*
                   3827:  * Module interface
                   3828:  */
                   3829:
1.356     pgoyette 3830: MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
1.327     pgoyette 3831:
                   3832: #ifdef _MODULE
                   3833: CFDRIVER_DECL(raid, DV_DISK, NULL);
                   3834: #endif
                   3835:
                   3836: static int raid_modcmd(modcmd_t, void *);
                   3837: static int raid_modcmd_init(void);
                   3838: static int raid_modcmd_fini(void);
                   3839:
                   3840: static int
                   3841: raid_modcmd(modcmd_t cmd, void *data)
                   3842: {
                   3843:        int error;
                   3844:
                   3845:        error = 0;
                   3846:        switch (cmd) {
                   3847:        case MODULE_CMD_INIT:
                   3848:                error = raid_modcmd_init();
                   3849:                break;
                   3850:        case MODULE_CMD_FINI:
                   3851:                error = raid_modcmd_fini();
                   3852:                break;
                   3853:        default:
                   3854:                error = ENOTTY;
                   3855:                break;
                   3856:        }
                   3857:        return error;
                   3858: }
                   3859:
                   3860: static int
                   3861: raid_modcmd_init(void)
                   3862: {
                   3863:        int error;
                   3864:        int bmajor, cmajor;
                   3865:
                   3866:        mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
                   3867:        mutex_enter(&raid_lock);
                   3868: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
                   3869:        rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
                   3870:        rf_init_cond2(rf_sparet_wait_cv, "sparetw");
                   3871:        rf_init_cond2(rf_sparet_resp_cv, "rfgst");
                   3872:
                   3873:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
                   3874: #endif
                   3875:
                   3876:        bmajor = cmajor = -1;
                   3877:        error = devsw_attach("raid", &raid_bdevsw, &bmajor,
                   3878:            &raid_cdevsw, &cmajor);
                   3879:        if (error != 0 && error != EEXIST) {
                   3880:                aprint_error("%s: devsw_attach failed %d\n", __func__, error);
                   3881:                mutex_exit(&raid_lock);
                   3882:                return error;
                   3883:        }
                   3884: #ifdef _MODULE
                   3885:        error = config_cfdriver_attach(&raid_cd);
                   3886:        if (error != 0) {
                   3887:                aprint_error("%s: config_cfdriver_attach failed %d\n",
                   3888:                    __func__, error);
                   3889:                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3890:                mutex_exit(&raid_lock);
                   3891:                return error;
                   3892:        }
                   3893: #endif
                   3894:        error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3895:        if (error != 0) {
                   3896:                aprint_error("%s: config_cfattach_attach failed %d\n",
                   3897:                    __func__, error);
                   3898: #ifdef _MODULE
                   3899:                config_cfdriver_detach(&raid_cd);
                   3900: #endif
                   3901:                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3902:                mutex_exit(&raid_lock);
                   3903:                return error;
                   3904:        }
                   3905:
                   3906:        raidautoconfigdone = false;
                   3907:
                   3908:        mutex_exit(&raid_lock);
                   3909:
                   3910:        if (error == 0) {
                   3911:                if (rf_BootRaidframe(true) == 0)
                   3912:                        aprint_verbose("Kernelized RAIDframe activated\n");
                   3913:                else
                   3914:                        panic("Serious error activating RAID!!");
                   3915:        }
                   3916:
                   3917:        /*
                   3918:         * Register a finalizer which will be used to auto-config RAID
                   3919:         * sets once all real hardware devices have been found.
                   3920:         */
                   3921:        error = config_finalize_register(NULL, rf_autoconfig);
                   3922:        if (error != 0) {
                   3923:                aprint_error("WARNING: unable to register RAIDframe "
                   3924:                    "finalizer\n");
1.329     pgoyette 3925:                error = 0;
1.327     pgoyette 3926:        }
                   3927:
                   3928:        return error;
                   3929: }
                   3930:
                   3931: static int
                   3932: raid_modcmd_fini(void)
                   3933: {
                   3934:        int error;
                   3935:
                   3936:        mutex_enter(&raid_lock);
                   3937:
                   3938:        /* Don't allow unload if raid device(s) exist.  */
                   3939:        if (!LIST_EMPTY(&raids)) {
                   3940:                mutex_exit(&raid_lock);
                   3941:                return EBUSY;
                   3942:        }
                   3943:
                   3944:        error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
                   3945:        if (error != 0) {
1.335     mlelstv  3946:                aprint_error("%s: cannot detach cfattach\n",__func__);
1.327     pgoyette 3947:                mutex_exit(&raid_lock);
                   3948:                return error;
                   3949:        }
                   3950: #ifdef _MODULE
                   3951:        error = config_cfdriver_detach(&raid_cd);
                   3952:        if (error != 0) {
1.335     mlelstv  3953:                aprint_error("%s: cannot detach cfdriver\n",__func__);
1.327     pgoyette 3954:                config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3955:                mutex_exit(&raid_lock);
                   3956:                return error;
                   3957:        }
                   3958: #endif
                   3959:        error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3960:        if (error != 0) {
1.335     mlelstv  3961:                aprint_error("%s: cannot detach devsw\n",__func__);
1.327     pgoyette 3962: #ifdef _MODULE
                   3963:                config_cfdriver_attach(&raid_cd);
                   3964: #endif
                   3965:                config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3966:                mutex_exit(&raid_lock);
                   3967:                return error;
                   3968:        }
                   3969:        rf_BootRaidframe(false);
                   3970: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
                   3971:        rf_destroy_mutex2(rf_sparet_wait_mutex);
                   3972:        rf_destroy_cond2(rf_sparet_wait_cv);
                   3973:        rf_destroy_cond2(rf_sparet_resp_cv);
                   3974: #endif
                   3975:        mutex_exit(&raid_lock);
                   3976:        mutex_destroy(&raid_lock);
                   3977:
                   3978:        return error;
                   3979: }

CVSweb <webmaster@jp.NetBSD.org>