[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.394

1.394   ! mrg         1: /*     $NetBSD: rf_netbsdkintf.c,v 1.393 2021/05/24 07:43:15 mrg Exp $ */
1.281     rmind       2:
1.1       oster       3: /*-
1.295     erh         4:  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1       oster       5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Greg Oster; Jason R. Thorpe.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     21:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     22:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     23:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     24:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     25:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     26:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     27:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     28:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     29:  * POSSIBILITY OF SUCH DAMAGE.
                     30:  */
                     31:
                     32: /*
1.281     rmind      33:  * Copyright (c) 1988 University of Utah.
1.1       oster      34:  * Copyright (c) 1990, 1993
                     35:  *      The Regents of the University of California.  All rights reserved.
                     36:  *
                     37:  * This code is derived from software contributed to Berkeley by
                     38:  * the Systems Programming Group of the University of Utah Computer
                     39:  * Science Department.
                     40:  *
                     41:  * Redistribution and use in source and binary forms, with or without
                     42:  * modification, are permitted provided that the following conditions
                     43:  * are met:
                     44:  * 1. Redistributions of source code must retain the above copyright
                     45:  *    notice, this list of conditions and the following disclaimer.
                     46:  * 2. Redistributions in binary form must reproduce the above copyright
                     47:  *    notice, this list of conditions and the following disclaimer in the
                     48:  *    documentation and/or other materials provided with the distribution.
1.162     agc        49:  * 3. Neither the name of the University nor the names of its contributors
                     50:  *    may be used to endorse or promote products derived from this software
                     51:  *    without specific prior written permission.
                     52:  *
                     53:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     54:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     55:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     56:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     57:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     58:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     59:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     60:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     61:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     62:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     63:  * SUCH DAMAGE.
                     64:  *
1.381     riastrad   65:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
1.162     agc        66:  *
                     67:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     68:  */
                     69:
                     70: /*
1.1       oster      71:  * Copyright (c) 1995 Carnegie-Mellon University.
                     72:  * All rights reserved.
                     73:  *
                     74:  * Authors: Mark Holland, Jim Zelenka
                     75:  *
                     76:  * Permission to use, copy, modify and distribute this software and
                     77:  * its documentation is hereby granted, provided that both the copyright
                     78:  * notice and this permission notice appear in all copies of the
                     79:  * software, derivative works or modified versions, and any portions
                     80:  * thereof, and that both notices appear in supporting documentation.
                     81:  *
                     82:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     83:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     84:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     85:  *
                     86:  * Carnegie Mellon requests users of this software to return to
                     87:  *
                     88:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     89:  *  School of Computer Science
                     90:  *  Carnegie Mellon University
                     91:  *  Pittsburgh PA 15213-3890
                     92:  *
                     93:  * any improvements or extensions that they make and grant Carnegie the
                     94:  * rights to redistribute these changes.
                     95:  */
                     96:
                     97: /***********************************************************
                     98:  *
                     99:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    100:  *
                    101:  ***********************************************************/
1.112     lukem     102:
                    103: #include <sys/cdefs.h>
1.394   ! mrg       104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.393 2021/05/24 07:43:15 mrg Exp $");
1.251     ad        105:
                    106: #ifdef _KERNEL_OPT
                    107: #include "opt_raid_autoconfig.h"
1.363     mrg       108: #include "opt_compat_netbsd32.h"
1.251     ad        109: #endif
1.1       oster     110:
1.113     lukem     111: #include <sys/param.h>
1.1       oster     112: #include <sys/errno.h>
                    113: #include <sys/pool.h>
1.152     thorpej   114: #include <sys/proc.h>
1.1       oster     115: #include <sys/queue.h>
                    116: #include <sys/disk.h>
                    117: #include <sys/device.h>
                    118: #include <sys/stat.h>
                    119: #include <sys/ioctl.h>
                    120: #include <sys/fcntl.h>
                    121: #include <sys/systm.h>
                    122: #include <sys/vnode.h>
                    123: #include <sys/disklabel.h>
                    124: #include <sys/conf.h>
                    125: #include <sys/buf.h>
1.182     yamt      126: #include <sys/bufq.h>
1.65      oster     127: #include <sys/reboot.h>
1.208     elad      128: #include <sys/kauth.h>
1.327     pgoyette  129: #include <sys/module.h>
1.358     pgoyette  130: #include <sys/compat_stub.h>
1.8       oster     131:
1.234     oster     132: #include <prop/proplib.h>
                    133:
1.110     oster     134: #include <dev/raidframe/raidframevar.h>
                    135: #include <dev/raidframe/raidframeio.h>
1.269     jld       136: #include <dev/raidframe/rf_paritymap.h>
1.251     ad        137:
1.1       oster     138: #include "rf_raid.h"
1.44      oster     139: #include "rf_copyback.h"
1.1       oster     140: #include "rf_dag.h"
                    141: #include "rf_dagflags.h"
1.99      oster     142: #include "rf_desc.h"
1.1       oster     143: #include "rf_diskqueue.h"
                    144: #include "rf_etimer.h"
                    145: #include "rf_general.h"
                    146: #include "rf_kintf.h"
                    147: #include "rf_options.h"
                    148: #include "rf_driver.h"
                    149: #include "rf_parityscan.h"
                    150: #include "rf_threadstuff.h"
                    151:
1.325     christos  152: #include "ioconf.h"
                    153:
1.133     oster     154: #ifdef DEBUG
1.9       oster     155: int     rf_kdebug_level = 0;
1.1       oster     156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9       oster     157: #else                          /* DEBUG */
1.1       oster     158: #define db1_printf(a) { }
1.9       oster     159: #endif                         /* DEBUG */
1.1       oster     160:
1.344     christos  161: #ifdef DEBUG_ROOT
                    162: #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
1.345     christos  163: #else
                    164: #define DPRINTF(a, ...)
1.344     christos  165: #endif
                    166:
1.249     oster     167: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289     mrg       168: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287     mrg       169: static rf_declare_cond2(rf_sparet_wait_cv);
                    170: static rf_declare_cond2(rf_sparet_resp_cv);
1.1       oster     171:
1.10      oster     172: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    173:                                                 * spare table */
                    174: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    175:                                                 * installation process */
1.249     oster     176: #endif
1.153     thorpej   177:
1.384     jdolecek  178: const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
                    179:
1.153     thorpej   180: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10      oster     181:
1.1       oster     182: /* prototypes */
1.187     christos  183: static void KernelWakeupFunc(struct buf *);
                    184: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225     christos  185:     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.384     jdolecek  186:     void *, int);
1.300     christos  187: static void raidinit(struct raid_softc *);
1.335     mlelstv   188: static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
1.348     jdolecek  189: static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
1.1       oster     190:
1.261     dyoung    191: static int raid_match(device_t, cfdata_t, void *);
                    192: static void raid_attach(device_t, device_t, void *);
                    193: static int raid_detach(device_t, int);
1.130     gehenna   194:
1.385     riastrad  195: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
1.269     jld       196:     daddr_t, daddr_t);
                    197: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
                    198:     daddr_t, daddr_t, int);
                    199:
1.276     mrg       200: static int raidwrite_component_label(unsigned,
                    201:     dev_t, struct vnode *, RF_ComponentLabel_t *);
                    202: static int raidread_component_label(unsigned,
                    203:     dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269     jld       204:
1.335     mlelstv   205: static int raid_diskstart(device_t, struct buf *bp);
                    206: static int raid_dumpblocks(device_t, void *, daddr_t, int);
                    207: static int raid_lastclose(device_t);
1.269     jld       208:
1.324     mrg       209: static dev_type_open(raidopen);
                    210: static dev_type_close(raidclose);
                    211: static dev_type_read(raidread);
                    212: static dev_type_write(raidwrite);
                    213: static dev_type_ioctl(raidioctl);
                    214: static dev_type_strategy(raidstrategy);
                    215: static dev_type_dump(raiddump);
                    216: static dev_type_size(raidsize);
1.130     gehenna   217:
                    218: const struct bdevsw raid_bdevsw = {
1.305     dholland  219:        .d_open = raidopen,
                    220:        .d_close = raidclose,
                    221:        .d_strategy = raidstrategy,
                    222:        .d_ioctl = raidioctl,
                    223:        .d_dump = raiddump,
                    224:        .d_psize = raidsize,
1.311     dholland  225:        .d_discard = nodiscard,
1.305     dholland  226:        .d_flag = D_DISK
1.130     gehenna   227: };
                    228:
                    229: const struct cdevsw raid_cdevsw = {
1.305     dholland  230:        .d_open = raidopen,
                    231:        .d_close = raidclose,
                    232:        .d_read = raidread,
                    233:        .d_write = raidwrite,
                    234:        .d_ioctl = raidioctl,
                    235:        .d_stop = nostop,
                    236:        .d_tty = notty,
                    237:        .d_poll = nopoll,
                    238:        .d_mmap = nommap,
                    239:        .d_kqfilter = nokqfilter,
1.312     dholland  240:        .d_discard = nodiscard,
1.305     dholland  241:        .d_flag = D_DISK
1.130     gehenna   242: };
1.1       oster     243:
1.323     mlelstv   244: static struct dkdriver rf_dkdriver = {
1.335     mlelstv   245:        .d_open = raidopen,
                    246:        .d_close = raidclose,
1.323     mlelstv   247:        .d_strategy = raidstrategy,
1.335     mlelstv   248:        .d_diskstart = raid_diskstart,
                    249:        .d_dumpblocks = raid_dumpblocks,
                    250:        .d_lastclose = raid_lastclose,
1.323     mlelstv   251:        .d_minphys = minphys
                    252: };
1.235     oster     253:
1.1       oster     254: #define        raidunit(x)     DISKUNIT(x)
1.335     mlelstv   255: #define        raidsoftc(dev)  (((struct raid_softc *)device_private(dev))->sc_r.softc)
1.1       oster     256:
1.202     oster     257: extern struct cfdriver raid_cd;
1.266     dyoung    258: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
                    259:     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
                    260:     DVF_DETACH_SHUTDOWN);
1.202     oster     261:
1.353     mrg       262: /* Internal representation of a rf_recon_req */
                    263: struct rf_recon_req_internal {
                    264:        RF_RowCol_t col;
                    265:        RF_ReconReqFlags_t flags;
                    266:        void   *raidPtr;
                    267: };
                    268:
1.186     perry     269: /*
                    270:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    271:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     272:  * kernel memory, especially on writes, and in degraded mode reads.
1.186     perry     273:  *
                    274:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    275:  * a single 64K write will typically require 64K for the old data,
                    276:  * 64K for the old parity, and 64K for the new parity, for a total
1.28      oster     277:  * of 192K (if the parity buffer is not re-used immediately).
1.110     oster     278:  * Even it if is used immediately, that's still 128K, which when multiplied
1.28      oster     279:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186     perry     280:  *
1.28      oster     281:  * Now in degraded mode, for example, a 64K read on the above setup may
1.186     perry     282:  * require data reconstruction, which will require *all* of the 4 remaining
1.28      oster     283:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     284:  */
                    285:
                    286: #ifndef RAIDOUTSTANDING
1.28      oster     287: #define RAIDOUTSTANDING   6
1.20      oster     288: #endif
                    289:
1.1       oster     290: #define RAIDLABELDEV(dev)      \
                    291:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    292:
                    293: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9       oster     294:
1.104     oster     295: static int raidlock(struct raid_softc *);
                    296: static void raidunlock(struct raid_softc *);
1.1       oster     297:
1.266     dyoung    298: static int raid_detach_unlocked(struct raid_softc *);
                    299:
1.104     oster     300: static void rf_markalldirty(RF_Raid_t *);
1.304     christos  301: static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
1.48      oster     302:
1.393     mrg       303: static void rf_ReconThread(struct rf_recon_req_internal *);
                    304: static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
                    305: static void rf_CopybackThread(RF_Raid_t *raidPtr);
                    306: static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
                    307: static int rf_autoconfig(device_t);
                    308: static void rf_buildroothack(RF_ConfigSet_t *);
1.104     oster     309:
1.393     mrg       310: static RF_AutoConfig_t *rf_find_raid_components(void);
                    311: static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
1.104     oster     312: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.393     mrg       313: static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
                    314: static int rf_set_autoconfig(RF_Raid_t *, int);
                    315: static int rf_set_rootpartition(RF_Raid_t *, int);
                    316: static void rf_release_all_vps(RF_ConfigSet_t *);
                    317: static void rf_cleanup_config_set(RF_ConfigSet_t *);
                    318: static int rf_have_enough_components(RF_ConfigSet_t *);
                    319: static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278     mrg       320: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48      oster     321:
1.295     erh       322: /*
                    323:  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
                    324:  * Note that this is overridden by having RAID_AUTOCONFIG as an option
                    325:  * in the kernel config file.
                    326:  */
                    327: #ifdef RAID_AUTOCONFIG
                    328: int raidautoconfig = 1;
                    329: #else
                    330: int raidautoconfig = 0;
                    331: #endif
                    332: static bool raidautoconfigdone = false;
1.37      oster     333:
1.177     oster     334: struct RF_Pools_s rf_pools;
                    335:
1.300     christos  336: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
                    337: static kmutex_t raid_lock;
1.1       oster     338:
1.300     christos  339: static struct raid_softc *
                    340: raidcreate(int unit) {
                    341:        struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
                    342:        sc->sc_unit = unit;
1.327     pgoyette  343:        cv_init(&sc->sc_cv, "raidunit");
                    344:        mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
1.300     christos  345:        return sc;
                    346: }
1.1       oster     347:
1.300     christos  348: static void
                    349: raiddestroy(struct raid_softc *sc) {
1.327     pgoyette  350:        cv_destroy(&sc->sc_cv);
                    351:        mutex_destroy(&sc->sc_mutex);
1.300     christos  352:        kmem_free(sc, sizeof(*sc));
                    353: }
1.50      oster     354:
1.300     christos  355: static struct raid_softc *
1.327     pgoyette  356: raidget(int unit, bool create) {
1.300     christos  357:        struct raid_softc *sc;
                    358:        if (unit < 0) {
                    359: #ifdef DIAGNOSTIC
                    360:                panic("%s: unit %d!", __func__, unit);
                    361: #endif
                    362:                return NULL;
                    363:        }
                    364:        mutex_enter(&raid_lock);
                    365:        LIST_FOREACH(sc, &raids, sc_link) {
                    366:                if (sc->sc_unit == unit) {
                    367:                        mutex_exit(&raid_lock);
                    368:                        return sc;
                    369:                }
                    370:        }
                    371:        mutex_exit(&raid_lock);
1.327     pgoyette  372:        if (!create)
                    373:                return NULL;
1.379     chs       374:        sc = raidcreate(unit);
1.300     christos  375:        mutex_enter(&raid_lock);
                    376:        LIST_INSERT_HEAD(&raids, sc, sc_link);
                    377:        mutex_exit(&raid_lock);
                    378:        return sc;
                    379: }
                    380:
1.385     riastrad  381: static void
1.300     christos  382: raidput(struct raid_softc *sc) {
                    383:        mutex_enter(&raid_lock);
                    384:        LIST_REMOVE(sc, sc_link);
                    385:        mutex_exit(&raid_lock);
                    386:        raiddestroy(sc);
                    387: }
1.1       oster     388:
1.300     christos  389: void
                    390: raidattach(int num)
                    391: {
1.62      oster     392:
1.142     thorpej   393:        /*
1.327     pgoyette  394:         * Device attachment and associated initialization now occurs
                    395:         * as part of the module initialization.
1.142     thorpej   396:         */
                    397: }
                    398:
1.393     mrg       399: static int
1.261     dyoung    400: rf_autoconfig(device_t self)
1.142     thorpej   401: {
                    402:        RF_AutoConfig_t *ac_list;
                    403:        RF_ConfigSet_t *config_sets;
                    404:
1.295     erh       405:        if (!raidautoconfig || raidautoconfigdone == true)
1.389     skrll     406:                return 0;
1.142     thorpej   407:
                    408:        /* XXX This code can only be run once. */
1.295     erh       409:        raidautoconfigdone = true;
1.142     thorpej   410:
1.307     christos  411: #ifdef __HAVE_CPU_BOOTCONF
                    412:        /*
                    413:         * 0. find the boot device if needed first so we can use it later
                    414:         * this needs to be done before we autoconfigure any raid sets,
                    415:         * because if we use wedges we are not going to be able to open
                    416:         * the boot device later
                    417:         */
                    418:        if (booted_device == NULL)
                    419:                cpu_bootconf();
                    420: #endif
1.48      oster     421:        /* 1. locate all RAID components on the system */
1.258     ad        422:        aprint_debug("Searching for RAID components...\n");
1.48      oster     423:        ac_list = rf_find_raid_components();
                    424:
1.142     thorpej   425:        /* 2. Sort them into their respective sets. */
1.48      oster     426:        config_sets = rf_create_auto_sets(ac_list);
                    427:
1.142     thorpej   428:        /*
1.299     oster     429:         * 3. Evaluate each set and configure the valid ones.
1.142     thorpej   430:         * This gets done in rf_buildroothack().
                    431:         */
                    432:        rf_buildroothack(config_sets);
1.48      oster     433:
1.213     christos  434:        return 1;
1.48      oster     435: }
                    436:
1.367     christos  437: int
                    438: rf_inited(const struct raid_softc *rs) {
                    439:        return (rs->sc_flags & RAIDF_INITED) != 0;
                    440: }
                    441:
1.368     oster     442: RF_Raid_t *
                    443: rf_get_raid(struct raid_softc *rs) {
                    444:        return &rs->sc_r;
                    445: }
                    446:
1.367     christos  447: int
                    448: rf_get_unit(const struct raid_softc *rs) {
                    449:        return rs->sc_unit;
                    450: }
                    451:
1.306     christos  452: static int
1.307     christos  453: rf_containsboot(RF_Raid_t *r, device_t bdv) {
1.359     bad       454:        const char *bootname;
                    455:        size_t len;
                    456:
                    457:        /* if bdv is NULL, the set can't contain it. exit early. */
                    458:        if (bdv == NULL)
                    459:                return 0;
                    460:
                    461:        bootname = device_xname(bdv);
                    462:        len = strlen(bootname);
1.306     christos  463:
                    464:        for (int col = 0; col < r->numCol; col++) {
1.307     christos  465:                const char *devname = r->Disks[col].devname;
1.306     christos  466:                devname += sizeof("/dev/") - 1;
1.307     christos  467:                if (strncmp(devname, "dk", 2) == 0) {
                    468:                        const char *parent =
                    469:                            dkwedge_get_parent_name(r->Disks[col].dev);
                    470:                        if (parent != NULL)
                    471:                                devname = parent;
                    472:                }
1.306     christos  473:                if (strncmp(devname, bootname, len) == 0) {
                    474:                        struct raid_softc *sc = r->softc;
                    475:                        aprint_debug("raid%d includes boot device %s\n",
                    476:                            sc->sc_unit, devname);
                    477:                        return 1;
                    478:                }
                    479:        }
                    480:        return 0;
                    481: }
                    482:
1.393     mrg       483: static void
1.142     thorpej   484: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48      oster     485: {
                    486:        RF_ConfigSet_t *cset;
                    487:        RF_ConfigSet_t *next_cset;
1.51      oster     488:        int num_root;
1.300     christos  489:        struct raid_softc *sc, *rsc;
1.378     martin    490:        struct dk_softc *dksc = NULL;   /* XXX gcc -Os: may be used uninit. */
1.48      oster     491:
1.300     christos  492:        sc = rsc = NULL;
1.51      oster     493:        num_root = 0;
1.48      oster     494:        cset = config_sets;
1.271     dyoung    495:        while (cset != NULL) {
1.48      oster     496:                next_cset = cset->next;
1.186     perry     497:                if (rf_have_enough_components(cset) &&
1.300     christos  498:                    cset->ac->clabel->autoconfigure == 1) {
                    499:                        sc = rf_auto_config_set(cset);
                    500:                        if (sc != NULL) {
1.359     bad       501:                                aprint_debug("raid%d: configured ok, rootable %d\n",
                    502:                                    sc->sc_unit, cset->rootable);
1.51      oster     503:                                if (cset->rootable) {
1.300     christos  504:                                        rsc = sc;
1.51      oster     505:                                        num_root++;
                    506:                                }
                    507:                        } else {
                    508:                                /* The autoconfig didn't work :( */
1.300     christos  509:                                aprint_debug("Autoconfig failed\n");
1.51      oster     510:                                rf_release_all_vps(cset);
1.48      oster     511:                        }
                    512:                } else {
1.186     perry     513:                        /* we're not autoconfiguring this set...
1.48      oster     514:                           release the associated resources */
1.49      oster     515:                        rf_release_all_vps(cset);
1.48      oster     516:                }
                    517:                /* cleanup */
1.49      oster     518:                rf_cleanup_config_set(cset);
1.48      oster     519:                cset = next_cset;
                    520:        }
1.122     oster     521:
1.223     oster     522:        /* if the user has specified what the root device should be
                    523:           then we don't touch booted_device or boothowto... */
                    524:
1.359     bad       525:        if (rootspec != NULL) {
                    526:                DPRINTF("%s: rootspec %s\n", __func__, rootspec);
1.223     oster     527:                return;
1.359     bad       528:        }
1.223     oster     529:
1.122     oster     530:        /* we found something bootable... */
                    531:
1.310     christos  532:        /*
                    533:         * XXX: The following code assumes that the root raid
                    534:         * is the first ('a') partition. This is about the best
                    535:         * we can do with a BSD disklabel, but we might be able
                    536:         * to do better with a GPT label, by setting a specified
                    537:         * attribute to indicate the root partition. We can then
                    538:         * stash the partition number in the r->root_partition
                    539:         * high bits (the bottom 2 bits are already used). For
                    540:         * now we just set booted_partition to 0 when we override
                    541:         * root.
                    542:         */
1.122     oster     543:        if (num_root == 1) {
1.306     christos  544:                device_t candidate_root;
1.377     maxv      545:                dksc = &rsc->sc_dksc;
1.335     mlelstv   546:                if (dksc->sc_dkdev.dk_nwedges != 0) {
1.297     christos  547:                        char cname[sizeof(cset->ac->devname)];
1.344     christos  548:                        /* XXX: assume partition 'a' first */
1.297     christos  549:                        snprintf(cname, sizeof(cname), "%s%c",
1.335     mlelstv   550:                            device_xname(dksc->sc_dev), 'a');
1.306     christos  551:                        candidate_root = dkwedge_find_by_wname(cname);
1.344     christos  552:                        DPRINTF("%s: candidate wedge root=%s\n", __func__,
                    553:                            cname);
                    554:                        if (candidate_root == NULL) {
                    555:                                /*
                    556:                                 * If that is not found, because we don't use
                    557:                                 * disklabel, return the first dk child
                    558:                                 * XXX: we can skip the 'a' check above
                    559:                                 * and always do this...
                    560:                                 */
                    561:                                size_t i = 0;
                    562:                                candidate_root = dkwedge_find_by_parent(
                    563:                                    device_xname(dksc->sc_dev), &i);
                    564:                        }
                    565:                        DPRINTF("%s: candidate wedge root=%p\n", __func__,
                    566:                            candidate_root);
1.297     christos  567:                } else
1.335     mlelstv   568:                        candidate_root = dksc->sc_dev;
1.344     christos  569:                DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
                    570:                DPRINTF("%s: booted_device=%p root_partition=%d "
1.359     bad       571:                        "contains_boot=%d",
                    572:                    __func__, booted_device, rsc->sc_r.root_partition,
                    573:                           rf_containsboot(&rsc->sc_r, booted_device));
                    574:                /* XXX the check for booted_device == NULL can probably be
                    575:                 * dropped, now that rf_containsboot handles that case.
                    576:                 */
1.308     christos  577:                if (booted_device == NULL ||
                    578:                    rsc->sc_r.root_partition == 1 ||
1.310     christos  579:                    rf_containsboot(&rsc->sc_r, booted_device)) {
1.308     christos  580:                        booted_device = candidate_root;
1.351     christos  581:                        booted_method = "raidframe/single";
1.310     christos  582:                        booted_partition = 0;   /* XXX assume 'a' */
1.392     mrg       583:                        DPRINTF("%s: set booted_device=%s(%p)\n", __func__,
                    584:                            device_xname(booted_device), booted_device);
1.310     christos  585:                }
1.122     oster     586:        } else if (num_root > 1) {
1.344     christos  587:                DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
                    588:                    booted_device);
1.226     oster     589:
1.385     riastrad  590:                /*
1.226     oster     591:                 * Maybe the MD code can help. If it cannot, then
                    592:                 * setroot() will discover that we have no
                    593:                 * booted_device and will ask the user if nothing was
1.385     riastrad  594:                 * hardwired in the kernel config file
1.226     oster     595:                 */
1.385     riastrad  596:                if (booted_device == NULL)
1.226     oster     597:                        return;
                    598:
                    599:                num_root = 0;
1.300     christos  600:                mutex_enter(&raid_lock);
                    601:                LIST_FOREACH(sc, &raids, sc_link) {
                    602:                        RF_Raid_t *r = &sc->sc_r;
                    603:                        if (r->valid == 0)
1.226     oster     604:                                continue;
                    605:
1.300     christos  606:                        if (r->root_partition == 0)
1.226     oster     607:                                continue;
                    608:
1.306     christos  609:                        if (rf_containsboot(r, booted_device)) {
1.226     oster     610:                                num_root++;
1.300     christos  611:                                rsc = sc;
1.335     mlelstv   612:                                dksc = &rsc->sc_dksc;
1.226     oster     613:                        }
                    614:                }
1.300     christos  615:                mutex_exit(&raid_lock);
1.295     erh       616:
1.226     oster     617:                if (num_root == 1) {
1.335     mlelstv   618:                        booted_device = dksc->sc_dev;
1.351     christos  619:                        booted_method = "raidframe/multi";
1.310     christos  620:                        booted_partition = 0;   /* XXX assume 'a' */
1.226     oster     621:                } else {
                    622:                        /* we can't guess.. require the user to answer... */
                    623:                        boothowto |= RB_ASKNAME;
                    624:                }
1.51      oster     625:        }
1.1       oster     626: }
                    627:
1.324     mrg       628: static int
1.169     oster     629: raidsize(dev_t dev)
1.1       oster     630: {
                    631:        struct raid_softc *rs;
1.335     mlelstv   632:        struct dk_softc *dksc;
                    633:        unsigned int unit;
1.1       oster     634:
                    635:        unit = raidunit(dev);
1.327     pgoyette  636:        if ((rs = raidget(unit, false)) == NULL)
1.336     mlelstv   637:                return -1;
1.335     mlelstv   638:        dksc = &rs->sc_dksc;
                    639:
1.1       oster     640:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.336     mlelstv   641:                return -1;
1.1       oster     642:
1.335     mlelstv   643:        return dk_size(dksc, dev);
                    644: }
1.1       oster     645:
1.335     mlelstv   646: static int
                    647: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
                    648: {
                    649:        unsigned int unit;
                    650:        struct raid_softc *rs;
                    651:        struct dk_softc *dksc;
1.1       oster     652:
1.335     mlelstv   653:        unit = raidunit(dev);
                    654:        if ((rs = raidget(unit, false)) == NULL)
                    655:                return ENXIO;
                    656:        dksc = &rs->sc_dksc;
1.1       oster     657:
1.335     mlelstv   658:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    659:                return ENODEV;
1.1       oster     660:
1.336     mlelstv   661:         /*
                    662:            Note that blkno is relative to this particular partition.
                    663:            By adding adding RF_PROTECTED_SECTORS, we get a value that
                    664:           is relative to the partition used for the underlying component.
                    665:         */
                    666:        blkno += RF_PROTECTED_SECTORS;
                    667:
1.380     riastrad  668:        return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
1.1       oster     669: }
                    670:
1.324     mrg       671: static int
1.335     mlelstv   672: raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1.1       oster     673: {
1.335     mlelstv   674:        struct raid_softc *rs = raidsoftc(dev);
1.231     oster     675:        const struct bdevsw *bdev;
                    676:        RF_Raid_t *raidPtr;
1.335     mlelstv   677:        int     c, sparecol, j, scol, dumpto;
1.231     oster     678:        int     error = 0;
                    679:
1.300     christos  680:        raidPtr = &rs->sc_r;
1.231     oster     681:
                    682:        /* we only support dumping to RAID 1 sets */
1.385     riastrad  683:        if (raidPtr->Layout.numDataCol != 1 ||
1.231     oster     684:            raidPtr->Layout.numParityCol != 1)
                    685:                return EINVAL;
                    686:
                    687:        if ((error = raidlock(rs)) != 0)
                    688:                return error;
                    689:
                    690:        /* figure out what device is alive.. */
                    691:
1.385     riastrad  692:        /*
1.231     oster     693:           Look for a component to dump to.  The preference for the
                    694:           component to dump to is as follows:
1.383     oster     695:           1) the first component
                    696:           2) a used_spare of the first component
                    697:           3) the second component
                    698:           4) a used_spare of the second component
1.231     oster     699:        */
                    700:
                    701:        dumpto = -1;
                    702:        for (c = 0; c < raidPtr->numCol; c++) {
                    703:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                    704:                        /* this might be the one */
                    705:                        dumpto = c;
                    706:                        break;
                    707:                }
                    708:        }
1.385     riastrad  709:
                    710:        /*
1.383     oster     711:           At this point we have possibly selected a live component.
                    712:           If we didn't find a live ocmponent, we now check to see
                    713:           if there is a relevant spared component.
1.231     oster     714:        */
                    715:
                    716:        for (c = 0; c < raidPtr->numSpare; c++) {
                    717:                sparecol = raidPtr->numCol + c;
                    718:                if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
                    719:                        /* How about this one? */
                    720:                        scol = -1;
                    721:                        for(j=0;j<raidPtr->numCol;j++) {
                    722:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                    723:                                        scol = j;
                    724:                                        break;
                    725:                                }
                    726:                        }
                    727:                        if (scol == 0) {
1.385     riastrad  728:                                /*
1.383     oster     729:                                   We must have found a spared first
                    730:                                   component!  We'll take that over
                    731:                                   anything else found so far.  (We
                    732:                                   couldn't have found a real first
                    733:                                   component before, since this is a
                    734:                                   used spare, and it's saying that
                    735:                                   it's replacing the first
                    736:                                   component.)  On reboot (with
1.231     oster     737:                                   autoconfiguration turned on)
1.383     oster     738:                                   sparecol will become the first
                    739:                                   component (component0) of this set.
1.231     oster     740:                                */
                    741:                                dumpto = sparecol;
                    742:                                break;
                    743:                        } else if (scol != -1) {
1.385     riastrad  744:                                /*
                    745:                                   Must be a spared second component.
                    746:                                   We'll dump to that if we havn't found
                    747:                                   anything else so far.
1.231     oster     748:                                */
                    749:                                if (dumpto == -1)
                    750:                                        dumpto = sparecol;
                    751:                        }
                    752:                }
                    753:        }
1.385     riastrad  754:
1.231     oster     755:        if (dumpto == -1) {
                    756:                /* we couldn't find any live components to dump to!?!?
                    757:                 */
                    758:                error = EINVAL;
                    759:                goto out;
                    760:        }
                    761:
                    762:        bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
1.342     mlelstv   763:        if (bdev == NULL) {
                    764:                error = ENXIO;
                    765:                goto out;
                    766:        }
1.231     oster     767:
1.385     riastrad  768:        error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
1.336     mlelstv   769:                                blkno, va, nblk * raidPtr->bytesPerSector);
1.385     riastrad  770:
1.231     oster     771: out:
                    772:        raidunlock(rs);
1.385     riastrad  773:
1.231     oster     774:        return error;
1.1       oster     775: }
1.324     mrg       776:
1.1       oster     777: /* ARGSUSED */
1.324     mrg       778: static int
1.222     christos  779: raidopen(dev_t dev, int flags, int fmt,
                    780:     struct lwp *l)
1.1       oster     781: {
1.9       oster     782:        int     unit = raidunit(dev);
1.1       oster     783:        struct raid_softc *rs;
1.335     mlelstv   784:        struct dk_softc *dksc;
                    785:        int     error = 0;
1.9       oster     786:        int     part, pmask;
                    787:
1.327     pgoyette  788:        if ((rs = raidget(unit, true)) == NULL)
1.300     christos  789:                return ENXIO;
1.1       oster     790:        if ((error = raidlock(rs)) != 0)
1.389     skrll     791:                return error;
1.266     dyoung    792:
                    793:        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
                    794:                error = EBUSY;
                    795:                goto bad;
                    796:        }
                    797:
1.335     mlelstv   798:        dksc = &rs->sc_dksc;
1.1       oster     799:
                    800:        part = DISKPART(dev);
                    801:        pmask = (1 << part);
                    802:
1.335     mlelstv   803:        if (!DK_BUSY(dksc, pmask) &&
1.13      oster     804:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    805:                /* First one... mark things as dirty... Note that we *MUST*
                    806:                 have done a configure before this.  I DO NOT WANT TO BE
                    807:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    808:                 THAT THEY BELONG TOGETHER!!!!! */
                    809:                /* XXX should check to see if we're only open for reading
                    810:                   here... If so, we needn't do this, but then need some
                    811:                   other way of keeping track of what's happened.. */
                    812:
1.300     christos  813:                rf_markalldirty(&rs->sc_r);
1.13      oster     814:        }
                    815:
1.335     mlelstv   816:        if ((rs->sc_flags & RAIDF_INITED) != 0)
                    817:                error = dk_open(dksc, dev, flags, fmt, l);
1.1       oster     818:
1.213     christos  819: bad:
1.1       oster     820:        raidunlock(rs);
                    821:
1.389     skrll     822:        return error;
1.1       oster     823:
                    824:
                    825: }
1.324     mrg       826:
1.335     mlelstv   827: static int
                    828: raid_lastclose(device_t self)
                    829: {
                    830:        struct raid_softc *rs = raidsoftc(self);
                    831:
                    832:        /* Last one... device is not unconfigured yet.
                    833:           Device shutdown has taken care of setting the
                    834:           clean bits if RAIDF_INITED is not set
                    835:           mark things as clean... */
                    836:
                    837:        rf_update_component_labels(&rs->sc_r,
                    838:            RF_FINAL_COMPONENT_UPDATE);
                    839:
                    840:        /* pass to unlocked code */
                    841:        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                    842:                rs->sc_flags |= RAIDF_DETACH;
                    843:
                    844:        return 0;
                    845: }
                    846:
1.1       oster     847: /* ARGSUSED */
1.324     mrg       848: static int
1.222     christos  849: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1       oster     850: {
1.9       oster     851:        int     unit = raidunit(dev);
1.1       oster     852:        struct raid_softc *rs;
1.335     mlelstv   853:        struct dk_softc *dksc;
                    854:        cfdata_t cf;
                    855:        int     error = 0, do_detach = 0, do_put = 0;
1.1       oster     856:
1.327     pgoyette  857:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  858:                return ENXIO;
1.335     mlelstv   859:        dksc = &rs->sc_dksc;
1.1       oster     860:
                    861:        if ((error = raidlock(rs)) != 0)
1.389     skrll     862:                return error;
1.1       oster     863:
1.335     mlelstv   864:        if ((rs->sc_flags & RAIDF_INITED) != 0) {
                    865:                error = dk_close(dksc, dev, flags, fmt, l);
                    866:                if ((rs->sc_flags & RAIDF_DETACH) != 0)
                    867:                        do_detach = 1;
                    868:        } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
                    869:                do_put = 1;
1.1       oster     870:
1.335     mlelstv   871:        raidunlock(rs);
1.1       oster     872:
1.335     mlelstv   873:        if (do_detach) {
                    874:                /* free the pseudo device attach bits */
                    875:                cf = device_cfdata(dksc->sc_dev);
                    876:                error = config_detach(dksc->sc_dev, 0);
1.385     riastrad  877:                if (error == 0)
1.335     mlelstv   878:                        free(cf, M_RAIDFRAME);
                    879:        } else if (do_put) {
                    880:                raidput(rs);
1.1       oster     881:        }
1.186     perry     882:
1.389     skrll     883:        return error;
1.147     oster     884:
1.335     mlelstv   885: }
1.327     pgoyette  886:
1.335     mlelstv   887: static void
                    888: raid_wakeup(RF_Raid_t *raidPtr)
                    889: {
                    890:        rf_lock_mutex2(raidPtr->iodone_lock);
                    891:        rf_signal_cond2(raidPtr->iodone_cv);
                    892:        rf_unlock_mutex2(raidPtr->iodone_lock);
1.1       oster     893: }
                    894:
1.324     mrg       895: static void
1.169     oster     896: raidstrategy(struct buf *bp)
1.1       oster     897: {
1.335     mlelstv   898:        unsigned int unit;
                    899:        struct raid_softc *rs;
                    900:        struct dk_softc *dksc;
1.1       oster     901:        RF_Raid_t *raidPtr;
                    902:
1.335     mlelstv   903:        unit = raidunit(bp->b_dev);
1.327     pgoyette  904:        if ((rs = raidget(unit, false)) == NULL) {
1.30      oster     905:                bp->b_error = ENXIO;
1.335     mlelstv   906:                goto fail;
1.30      oster     907:        }
1.300     christos  908:        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                    909:                bp->b_error = ENXIO;
1.335     mlelstv   910:                goto fail;
1.1       oster     911:        }
1.335     mlelstv   912:        dksc = &rs->sc_dksc;
1.300     christos  913:        raidPtr = &rs->sc_r;
1.335     mlelstv   914:
                    915:        /* Queue IO only */
                    916:        if (dk_strategy_defer(dksc, bp))
1.196     yamt      917:                goto done;
1.1       oster     918:
1.335     mlelstv   919:        /* schedule the IO to happen at the next convenient time */
                    920:        raid_wakeup(raidPtr);
                    921:
                    922: done:
                    923:        return;
                    924:
                    925: fail:
                    926:        bp->b_resid = bp->b_bcount;
                    927:        biodone(bp);
                    928: }
                    929:
                    930: static int
                    931: raid_diskstart(device_t dev, struct buf *bp)
                    932: {
                    933:        struct raid_softc *rs = raidsoftc(dev);
                    934:        RF_Raid_t *raidPtr;
1.1       oster     935:
1.335     mlelstv   936:        raidPtr = &rs->sc_r;
                    937:        if (!raidPtr->valid) {
                    938:                db1_printf(("raid is not valid..\n"));
                    939:                return ENODEV;
1.196     yamt      940:        }
1.285     mrg       941:
1.335     mlelstv   942:        /* XXX */
                    943:        bp->b_resid = 0;
                    944:
                    945:        return raiddoaccess(raidPtr, bp);
                    946: }
1.1       oster     947:
1.335     mlelstv   948: void
                    949: raiddone(RF_Raid_t *raidPtr, struct buf *bp)
                    950: {
                    951:        struct raid_softc *rs;
                    952:        struct dk_softc *dksc;
1.34      oster     953:
1.335     mlelstv   954:        rs = raidPtr->softc;
                    955:        dksc = &rs->sc_dksc;
1.34      oster     956:
1.335     mlelstv   957:        dk_done(dksc, bp);
1.34      oster     958:
1.335     mlelstv   959:        rf_lock_mutex2(raidPtr->mutex);
                    960:        raidPtr->openings++;
                    961:        rf_unlock_mutex2(raidPtr->mutex);
1.196     yamt      962:
1.335     mlelstv   963:        /* schedule more IO */
                    964:        raid_wakeup(raidPtr);
1.1       oster     965: }
1.324     mrg       966:
1.1       oster     967: /* ARGSUSED */
1.324     mrg       968: static int
1.222     christos  969: raidread(dev_t dev, struct uio *uio, int flags)
1.1       oster     970: {
1.9       oster     971:        int     unit = raidunit(dev);
1.1       oster     972:        struct raid_softc *rs;
                    973:
1.327     pgoyette  974:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  975:                return ENXIO;
1.1       oster     976:
                    977:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.389     skrll     978:                return ENXIO;
1.1       oster     979:
1.389     skrll     980:        return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1.1       oster     981:
                    982: }
1.324     mrg       983:
1.1       oster     984: /* ARGSUSED */
1.324     mrg       985: static int
1.222     christos  986: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1       oster     987: {
1.9       oster     988:        int     unit = raidunit(dev);
1.1       oster     989:        struct raid_softc *rs;
                    990:
1.327     pgoyette  991:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos  992:                return ENXIO;
1.1       oster     993:
                    994:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.389     skrll     995:                return ENXIO;
1.147     oster     996:
1.389     skrll     997:        return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1.1       oster     998:
                    999: }
                   1000:
1.266     dyoung   1001: static int
                   1002: raid_detach_unlocked(struct raid_softc *rs)
                   1003: {
1.335     mlelstv  1004:        struct dk_softc *dksc = &rs->sc_dksc;
                   1005:        RF_Raid_t *raidPtr;
1.266     dyoung   1006:        int error;
                   1007:
1.300     christos 1008:        raidPtr = &rs->sc_r;
1.266     dyoung   1009:
1.337     mlelstv  1010:        if (DK_BUSY(dksc, 0) ||
                   1011:            raidPtr->recon_in_progress != 0 ||
                   1012:            raidPtr->parity_rewrite_in_progress != 0 ||
                   1013:            raidPtr->copyback_in_progress != 0)
1.266     dyoung   1014:                return EBUSY;
                   1015:
                   1016:        if ((rs->sc_flags & RAIDF_INITED) == 0)
1.333     mlelstv  1017:                return 0;
                   1018:
                   1019:        rs->sc_flags &= ~RAIDF_SHUTDOWN;
                   1020:
                   1021:        if ((error = rf_Shutdown(raidPtr)) != 0)
1.266     dyoung   1022:                return error;
                   1023:
1.335     mlelstv  1024:        rs->sc_flags &= ~RAIDF_INITED;
                   1025:
                   1026:        /* Kill off any queued buffers */
                   1027:        dk_drain(dksc);
                   1028:        bufq_free(dksc->sc_bufq);
                   1029:
1.266     dyoung   1030:        /* Detach the disk. */
1.335     mlelstv  1031:        dkwedge_delall(&dksc->sc_dkdev);
                   1032:        disk_detach(&dksc->sc_dkdev);
                   1033:        disk_destroy(&dksc->sc_dkdev);
                   1034:        dk_detach(dksc);
1.333     mlelstv  1035:
1.266     dyoung   1036:        return 0;
                   1037: }
                   1038:
1.366     christos 1039: static bool
                   1040: rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
                   1041: {
                   1042:        switch (cmd) {
                   1043:        case RAIDFRAME_ADD_HOT_SPARE:
                   1044:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
                   1045:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
                   1046:        case RAIDFRAME_CHECK_PARITY:
                   1047:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
                   1048:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                   1049:        case RAIDFRAME_CHECK_RECON_STATUS:
                   1050:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                   1051:        case RAIDFRAME_COPYBACK:
                   1052:        case RAIDFRAME_DELETE_COMPONENT:
                   1053:        case RAIDFRAME_FAIL_DISK:
                   1054:        case RAIDFRAME_GET_ACCTOTALS:
                   1055:        case RAIDFRAME_GET_COMPONENT_LABEL:
                   1056:        case RAIDFRAME_GET_INFO:
                   1057:        case RAIDFRAME_GET_SIZE:
                   1058:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1059:        case RAIDFRAME_INIT_LABELS:
                   1060:        case RAIDFRAME_KEEP_ACCTOTALS:
                   1061:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
                   1062:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
                   1063:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
                   1064:        case RAIDFRAME_PARITYMAP_STATUS:
                   1065:        case RAIDFRAME_REBUILD_IN_PLACE:
                   1066:        case RAIDFRAME_REMOVE_HOT_SPARE:
                   1067:        case RAIDFRAME_RESET_ACCTOTALS:
                   1068:        case RAIDFRAME_REWRITEPARITY:
                   1069:        case RAIDFRAME_SET_AUTOCONFIG:
                   1070:        case RAIDFRAME_SET_COMPONENT_LABEL:
                   1071:        case RAIDFRAME_SET_ROOT:
1.369     oster    1072:                return (rs->sc_flags & RAIDF_INITED) == 0;
1.366     christos 1073:        }
                   1074:        return false;
                   1075: }
                   1076:
                   1077: int
                   1078: rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
                   1079: {
                   1080:        struct rf_recon_req_internal *rrint;
                   1081:
                   1082:        if (raidPtr->Layout.map->faultsTolerated == 0) {
                   1083:                /* Can't do this on a RAID 0!! */
                   1084:                return EINVAL;
                   1085:        }
                   1086:
                   1087:        if (rr->col < 0 || rr->col >= raidPtr->numCol) {
                   1088:                /* bad column */
                   1089:                return EINVAL;
                   1090:        }
                   1091:
                   1092:        rf_lock_mutex2(raidPtr->mutex);
                   1093:        if (raidPtr->status == rf_rs_reconstructing) {
                   1094:                /* you can't fail a disk while we're reconstructing! */
                   1095:                /* XXX wrong for RAID6 */
                   1096:                goto out;
                   1097:        }
                   1098:        if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
                   1099:            (raidPtr->numFailures > 0)) {
                   1100:                /* some other component has failed.  Let's not make
                   1101:                   things worse. XXX wrong for RAID6 */
                   1102:                goto out;
                   1103:        }
                   1104:        if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
                   1105:                /* Can't fail a spared disk! */
                   1106:                goto out;
                   1107:        }
                   1108:        rf_unlock_mutex2(raidPtr->mutex);
                   1109:
                   1110:        /* make a copy of the recon request so that we don't rely on
                   1111:         * the user's buffer */
1.374     christos 1112:        rrint = RF_Malloc(sizeof(*rrint));
1.366     christos 1113:        if (rrint == NULL)
                   1114:                return(ENOMEM);
                   1115:        rrint->col = rr->col;
                   1116:        rrint->flags = rr->flags;
                   1117:        rrint->raidPtr = raidPtr;
                   1118:
                   1119:        return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
                   1120:            rrint, "raid_recon");
                   1121: out:
                   1122:        rf_unlock_mutex2(raidPtr->mutex);
                   1123:        return EINVAL;
                   1124: }
                   1125:
1.324     mrg      1126: static int
1.367     christos 1127: rf_copyinspecificbuf(RF_Config_t *k_cfg)
                   1128: {
                   1129:        /* allocate a buffer for the layout-specific data, and copy it in */
                   1130:        if (k_cfg->layoutSpecificSize == 0)
                   1131:                return 0;
                   1132:
                   1133:        if (k_cfg->layoutSpecificSize > 10000) {
                   1134:            /* sanity check */
                   1135:            return EINVAL;
                   1136:        }
                   1137:
                   1138:        u_char *specific_buf;
1.374     christos 1139:        specific_buf =  RF_Malloc(k_cfg->layoutSpecificSize);
1.367     christos 1140:        if (specific_buf == NULL)
                   1141:                return ENOMEM;
                   1142:
                   1143:        int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
                   1144:            k_cfg->layoutSpecificSize);
                   1145:        if (retcode) {
                   1146:                RF_Free(specific_buf, k_cfg->layoutSpecificSize);
                   1147:                db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
                   1148:                return retcode;
                   1149:        }
                   1150:
                   1151:        k_cfg->layoutSpecific = specific_buf;
                   1152:        return 0;
                   1153: }
                   1154:
                   1155: static int
                   1156: rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
                   1157: {
1.372     christos 1158:        RF_Config_t *u_cfg = *((RF_Config_t **) data);
                   1159:
1.367     christos 1160:        if (rs->sc_r.valid) {
                   1161:                /* There is a valid RAID set running on this unit! */
                   1162:                printf("raid%d: Device already configured!\n", rs->sc_unit);
                   1163:                return EINVAL;
                   1164:        }
                   1165:
                   1166:        /* copy-in the configuration information */
                   1167:        /* data points to a pointer to the configuration structure */
1.374     christos 1168:        *k_cfg = RF_Malloc(sizeof(**k_cfg));
1.367     christos 1169:        if (*k_cfg == NULL) {
                   1170:                return ENOMEM;
                   1171:        }
1.373     christos 1172:        int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1.367     christos 1173:        if (retcode == 0)
                   1174:                return 0;
                   1175:        RF_Free(*k_cfg, sizeof(RF_Config_t));
                   1176:        db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
                   1177:        rs->sc_flags |= RAIDF_SHUTDOWN;
                   1178:        return retcode;
                   1179: }
                   1180:
                   1181: int
                   1182: rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
                   1183: {
                   1184:        int retcode;
                   1185:        RF_Raid_t *raidPtr = &rs->sc_r;
                   1186:
                   1187:        rs->sc_flags &= ~RAIDF_SHUTDOWN;
                   1188:
                   1189:        if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
                   1190:                goto out;
                   1191:
                   1192:        /* should do some kind of sanity check on the configuration.
                   1193:         * Store the sum of all the bytes in the last byte? */
                   1194:
                   1195:        /* configure the system */
                   1196:
                   1197:        /*
                   1198:         * Clear the entire RAID descriptor, just to make sure
                   1199:         *  there is no stale data left in the case of a
                   1200:         *  reconfiguration
                   1201:         */
                   1202:        memset(raidPtr, 0, sizeof(*raidPtr));
                   1203:        raidPtr->softc = rs;
                   1204:        raidPtr->raidid = rs->sc_unit;
                   1205:
                   1206:        retcode = rf_Configure(raidPtr, k_cfg, NULL);
                   1207:
                   1208:        if (retcode == 0) {
                   1209:                /* allow this many simultaneous IO's to
                   1210:                   this RAID device */
                   1211:                raidPtr->openings = RAIDOUTSTANDING;
                   1212:
                   1213:                raidinit(rs);
                   1214:                raid_wakeup(raidPtr);
                   1215:                rf_markalldirty(raidPtr);
                   1216:        }
                   1217:
                   1218:        /* free the buffers.  No return code here. */
                   1219:        if (k_cfg->layoutSpecificSize) {
                   1220:                RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
                   1221:        }
                   1222: out:
                   1223:        RF_Free(k_cfg, sizeof(RF_Config_t));
                   1224:        if (retcode) {
                   1225:                /*
                   1226:                 * If configuration failed, set sc_flags so that we
                   1227:                 * will detach the device when we close it.
                   1228:                 */
                   1229:                rs->sc_flags |= RAIDF_SHUTDOWN;
                   1230:        }
                   1231:        return retcode;
                   1232: }
                   1233:
                   1234: #if RF_DISABLED
                   1235: static int
                   1236: rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
                   1237: {
                   1238:
                   1239:        /* XXX check the label for valid stuff... */
                   1240:        /* Note that some things *should not* get modified --
                   1241:           the user should be re-initing the labels instead of
                   1242:           trying to patch things.
                   1243:           */
                   1244: #ifdef DEBUG
                   1245:        int raidid = raidPtr->raidid;
                   1246:        printf("raid%d: Got component label:\n", raidid);
                   1247:        printf("raid%d: Version: %d\n", raidid, clabel->version);
                   1248:        printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
                   1249:        printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
                   1250:        printf("raid%d: Column: %d\n", raidid, clabel->column);
                   1251:        printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
                   1252:        printf("raid%d: Clean: %d\n", raidid, clabel->clean);
                   1253:        printf("raid%d: Status: %d\n", raidid, clabel->status);
                   1254: #endif /* DEBUG */
                   1255:        clabel->row = 0;
                   1256:        int column = clabel->column;
                   1257:
                   1258:        if ((column < 0) || (column >= raidPtr->numCol)) {
                   1259:                return(EINVAL);
                   1260:        }
                   1261:
                   1262:        /* XXX this isn't allowed to do anything for now :-) */
                   1263:
                   1264:        /* XXX and before it is, we need to fill in the rest
                   1265:           of the fields!?!?!?! */
                   1266:        memcpy(raidget_component_label(raidPtr, column),
                   1267:            clabel, sizeof(*clabel));
                   1268:        raidflush_component_label(raidPtr, column);
                   1269:        return 0;
                   1270: }
                   1271: #endif
                   1272:
                   1273: static int
                   1274: rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
                   1275: {
                   1276:        /*
                   1277:           we only want the serial number from
                   1278:           the above.  We get all the rest of the information
                   1279:           from the config that was used to create this RAID
                   1280:           set.
                   1281:           */
                   1282:
                   1283:        raidPtr->serial_number = clabel->serial_number;
                   1284:
                   1285:        for (int column = 0; column < raidPtr->numCol; column++) {
                   1286:                RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
                   1287:                if (RF_DEAD_DISK(diskPtr->status))
                   1288:                        continue;
                   1289:                RF_ComponentLabel_t *ci_label = raidget_component_label(
                   1290:                    raidPtr, column);
                   1291:                /* Zeroing this is important. */
                   1292:                memset(ci_label, 0, sizeof(*ci_label));
                   1293:                raid_init_component_label(raidPtr, ci_label);
                   1294:                ci_label->serial_number = raidPtr->serial_number;
                   1295:                ci_label->row = 0; /* we dont' pretend to support more */
                   1296:                rf_component_label_set_partitionsize(ci_label,
                   1297:                    diskPtr->partitionSize);
                   1298:                ci_label->column = column;
                   1299:                raidflush_component_label(raidPtr, column);
                   1300:                /* XXXjld what about the spares? */
                   1301:        }
1.385     riastrad 1302:
1.367     christos 1303:        return 0;
                   1304: }
                   1305:
                   1306: static int
                   1307: rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
                   1308: {
                   1309:
                   1310:        if (raidPtr->Layout.map->faultsTolerated == 0) {
                   1311:                /* Can't do this on a RAID 0!! */
                   1312:                return EINVAL;
                   1313:        }
                   1314:
                   1315:        if (raidPtr->recon_in_progress == 1) {
                   1316:                /* a reconstruct is already in progress! */
                   1317:                return EINVAL;
                   1318:        }
                   1319:
                   1320:        RF_SingleComponent_t component;
                   1321:        memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1322:        component.row = 0; /* we don't support any more */
                   1323:        int column = component.column;
                   1324:
                   1325:        if ((column < 0) || (column >= raidPtr->numCol)) {
                   1326:                return EINVAL;
                   1327:        }
                   1328:
                   1329:        rf_lock_mutex2(raidPtr->mutex);
                   1330:        if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
                   1331:            (raidPtr->numFailures > 0)) {
                   1332:                /* XXX 0 above shouldn't be constant!!! */
                   1333:                /* some component other than this has failed.
                   1334:                   Let's not make things worse than they already
                   1335:                   are... */
                   1336:                printf("raid%d: Unable to reconstruct to disk at:\n",
                   1337:                       raidPtr->raidid);
                   1338:                printf("raid%d:     Col: %d   Too many failures.\n",
                   1339:                       raidPtr->raidid, column);
                   1340:                rf_unlock_mutex2(raidPtr->mutex);
                   1341:                return EINVAL;
                   1342:        }
                   1343:
                   1344:        if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
                   1345:                printf("raid%d: Unable to reconstruct to disk at:\n",
                   1346:                       raidPtr->raidid);
                   1347:                printf("raid%d:    Col: %d   "
                   1348:                    "Reconstruction already occurring!\n",
                   1349:                    raidPtr->raidid, column);
                   1350:
                   1351:                rf_unlock_mutex2(raidPtr->mutex);
                   1352:                return EINVAL;
                   1353:        }
                   1354:
                   1355:        if (raidPtr->Disks[column].status == rf_ds_spared) {
                   1356:                rf_unlock_mutex2(raidPtr->mutex);
                   1357:                return EINVAL;
                   1358:        }
                   1359:
                   1360:        rf_unlock_mutex2(raidPtr->mutex);
                   1361:
                   1362:        struct rf_recon_req_internal *rrint;
1.374     christos 1363:        rrint = RF_Malloc(sizeof(*rrint));
1.367     christos 1364:        if (rrint == NULL)
                   1365:                return ENOMEM;
                   1366:
                   1367:        rrint->col = column;
                   1368:        rrint->raidPtr = raidPtr;
                   1369:
                   1370:        return RF_CREATE_THREAD(raidPtr->recon_thread,
                   1371:            rf_ReconstructInPlaceThread, rrint, "raid_reconip");
                   1372: }
                   1373:
                   1374: static int
                   1375: rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
                   1376: {
                   1377:        /*
                   1378:         * This makes no sense on a RAID 0, or if we are not reconstructing
                   1379:         * so tell the user it's done.
                   1380:         */
                   1381:        if (raidPtr->Layout.map->faultsTolerated == 0 ||
                   1382:            raidPtr->status != rf_rs_reconstructing) {
                   1383:                *data = 100;
                   1384:                return 0;
                   1385:        }
                   1386:        if (raidPtr->reconControl->numRUsTotal == 0) {
                   1387:                *data = 0;
                   1388:                return 0;
                   1389:        }
                   1390:        *data = (raidPtr->reconControl->numRUsComplete * 100
                   1391:            / raidPtr->reconControl->numRUsTotal);
                   1392:        return 0;
                   1393: }
                   1394:
                   1395: static int
1.225     christos 1396: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1       oster    1397: {
1.9       oster    1398:        int     unit = raidunit(dev);
1.335     mlelstv  1399:        int     part, pmask;
1.1       oster    1400:        struct raid_softc *rs;
1.335     mlelstv  1401:        struct dk_softc *dksc;
1.367     christos 1402:        RF_Config_t *k_cfg;
1.42      oster    1403:        RF_Raid_t *raidPtr;
1.41      oster    1404:        RF_AccTotals_t *totals;
1.367     christos 1405:        RF_SingleComponent_t component;
1.371     oster    1406:        RF_DeviceConfig_t *d_cfg, *ucfgp;
1.11      oster    1407:        int retcode = 0;
                   1408:        int column;
1.48      oster    1409:        RF_ComponentLabel_t *clabel;
1.12      oster    1410:        RF_SingleComponent_t *sparePtr,*componentPtr;
1.353     mrg      1411:        int d;
1.1       oster    1412:
1.327     pgoyette 1413:        if ((rs = raidget(unit, false)) == NULL)
1.300     christos 1414:                return ENXIO;
1.366     christos 1415:
1.335     mlelstv  1416:        dksc = &rs->sc_dksc;
1.300     christos 1417:        raidPtr = &rs->sc_r;
1.1       oster    1418:
1.276     mrg      1419:        db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1.366     christos 1420:            (int) DISKPART(dev), (int) unit, cmd));
1.1       oster    1421:
                   1422:        /* Must be initialized for these... */
1.366     christos 1423:        if (rf_must_be_initialized(rs, cmd))
                   1424:                return ENXIO;
1.9       oster    1425:
1.358     pgoyette 1426:        switch (cmd) {
1.1       oster    1427:                /* configure the system */
                   1428:        case RAIDFRAME_CONFIGURE:
1.367     christos 1429:                if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
                   1430:                        return retcode;
                   1431:                return rf_construct(rs, k_cfg);
1.9       oster    1432:
                   1433:                /* shutdown the system */
1.1       oster    1434:        case RAIDFRAME_SHUTDOWN:
1.9       oster    1435:
1.266     dyoung   1436:                part = DISKPART(dev);
                   1437:                pmask = (1 << part);
                   1438:
1.367     christos 1439:                if ((retcode = raidlock(rs)) != 0)
                   1440:                        return retcode;
1.1       oster    1441:
1.337     mlelstv  1442:                if (DK_BUSY(dksc, pmask) ||
                   1443:                    raidPtr->recon_in_progress != 0 ||
                   1444:                    raidPtr->parity_rewrite_in_progress != 0 ||
                   1445:                    raidPtr->copyback_in_progress != 0)
1.266     dyoung   1446:                        retcode = EBUSY;
                   1447:                else {
1.335     mlelstv  1448:                        /* detach and free on close */
1.266     dyoung   1449:                        rs->sc_flags |= RAIDF_SHUTDOWN;
                   1450:                        retcode = 0;
1.9       oster    1451:                }
1.11      oster    1452:
1.266     dyoung   1453:                raidunlock(rs);
1.1       oster    1454:
1.367     christos 1455:                return retcode;
1.11      oster    1456:        case RAIDFRAME_GET_COMPONENT_LABEL:
1.353     mrg      1457:                return rf_get_component_label(raidPtr, data);
1.11      oster    1458:
1.367     christos 1459: #if RF_DISABLED
1.11      oster    1460:        case RAIDFRAME_SET_COMPONENT_LABEL:
1.367     christos 1461:                return rf_set_component_label(raidPtr, data);
                   1462: #endif
1.11      oster    1463:
1.367     christos 1464:        case RAIDFRAME_INIT_LABELS:
                   1465:                return rf_init_component_label(raidPtr, data);
1.12      oster    1466:
1.48      oster    1467:        case RAIDFRAME_SET_AUTOCONFIG:
1.78      minoura  1468:                d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186     perry    1469:                printf("raid%d: New autoconfig value is: %d\n",
1.123     oster    1470:                       raidPtr->raidid, d);
1.78      minoura  1471:                *(int *) data = d;
1.367     christos 1472:                return retcode;
1.48      oster    1473:
                   1474:        case RAIDFRAME_SET_ROOT:
1.78      minoura  1475:                d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186     perry    1476:                printf("raid%d: New rootpartition value is: %d\n",
1.123     oster    1477:                       raidPtr->raidid, d);
1.78      minoura  1478:                *(int *) data = d;
1.367     christos 1479:                return retcode;
1.9       oster    1480:
1.1       oster    1481:                /* initialize all parity */
                   1482:        case RAIDFRAME_REWRITEPARITY:
                   1483:
1.42      oster    1484:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17      oster    1485:                        /* Parity for RAID 0 is trivially correct */
1.42      oster    1486:                        raidPtr->parity_good = RF_RAID_CLEAN;
1.367     christos 1487:                        return 0;
1.17      oster    1488:                }
1.186     perry    1489:
1.42      oster    1490:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.37      oster    1491:                        /* Re-write is already in progress! */
1.367     christos 1492:                        return EINVAL;
1.37      oster    1493:                }
1.27      oster    1494:
1.367     christos 1495:                return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
                   1496:                    rf_RewriteParityThread, raidPtr,"raid_parity");
1.11      oster    1497:
                   1498:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster    1499:                sparePtr = (RF_SingleComponent_t *) data;
1.367     christos 1500:                memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
                   1501:                return rf_add_hot_spare(raidPtr, &component);
1.11      oster    1502:
                   1503:        case RAIDFRAME_REMOVE_HOT_SPARE:
1.367     christos 1504:                return retcode;
1.73      oster    1505:
                   1506:        case RAIDFRAME_DELETE_COMPONENT:
                   1507:                componentPtr = (RF_SingleComponent_t *)data;
1.367     christos 1508:                memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1509:                return rf_delete_component(raidPtr, &component);
1.73      oster    1510:
                   1511:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1512:                componentPtr = (RF_SingleComponent_t *)data;
1.367     christos 1513:                memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
                   1514:                return rf_incorporate_hot_spare(raidPtr, &component);
1.11      oster    1515:
1.12      oster    1516:        case RAIDFRAME_REBUILD_IN_PLACE:
1.367     christos 1517:                return rf_rebuild_in_place(raidPtr, data);
1.24      oster    1518:
1.366     christos 1519:        case RAIDFRAME_GET_INFO:
1.371     oster    1520:                ucfgp = *(RF_DeviceConfig_t **)data;
1.374     christos 1521:                d_cfg = RF_Malloc(sizeof(*d_cfg));
1.41      oster    1522:                if (d_cfg == NULL)
1.366     christos 1523:                        return ENOMEM;
1.353     mrg      1524:                retcode = rf_get_info(raidPtr, d_cfg);
                   1525:                if (retcode == 0) {
1.371     oster    1526:                        retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1.41      oster    1527:                }
                   1528:                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1.366     christos 1529:                return retcode;
1.9       oster    1530:
1.22      oster    1531:        case RAIDFRAME_CHECK_PARITY:
1.42      oster    1532:                *(int *) data = raidPtr->parity_good;
1.367     christos 1533:                return 0;
1.41      oster    1534:
1.269     jld      1535:        case RAIDFRAME_PARITYMAP_STATUS:
1.273     jld      1536:                if (rf_paritymap_ineligible(raidPtr))
                   1537:                        return EINVAL;
1.367     christos 1538:                rf_paritymap_status(raidPtr->parity_map, data);
1.269     jld      1539:                return 0;
                   1540:
                   1541:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273     jld      1542:                if (rf_paritymap_ineligible(raidPtr))
                   1543:                        return EINVAL;
1.269     jld      1544:                if (raidPtr->parity_map == NULL)
                   1545:                        return ENOENT; /* ??? */
1.367     christos 1546:                if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1.269     jld      1547:                        return EINVAL;
                   1548:                return 0;
                   1549:
                   1550:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273     jld      1551:                if (rf_paritymap_ineligible(raidPtr))
                   1552:                        return EINVAL;
1.269     jld      1553:                *(int *) data = rf_paritymap_get_disable(raidPtr);
                   1554:                return 0;
                   1555:
                   1556:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273     jld      1557:                if (rf_paritymap_ineligible(raidPtr))
                   1558:                        return EINVAL;
1.269     jld      1559:                rf_paritymap_set_disable(raidPtr, *(int *)data);
                   1560:                /* XXX should errors be passed up? */
                   1561:                return 0;
                   1562:
1.1       oster    1563:        case RAIDFRAME_RESET_ACCTOTALS:
1.108     thorpej  1564:                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.367     christos 1565:                return 0;
1.9       oster    1566:
1.1       oster    1567:        case RAIDFRAME_GET_ACCTOTALS:
1.41      oster    1568:                totals = (RF_AccTotals_t *) data;
1.42      oster    1569:                *totals = raidPtr->acc_totals;
1.366     christos 1570:                return 0;
1.9       oster    1571:
1.1       oster    1572:        case RAIDFRAME_KEEP_ACCTOTALS:
1.42      oster    1573:                raidPtr->keep_acc_totals = *(int *)data;
1.366     christos 1574:                return 0;
1.9       oster    1575:
1.1       oster    1576:        case RAIDFRAME_GET_SIZE:
1.42      oster    1577:                *(int *) data = raidPtr->totalSectors;
1.366     christos 1578:                return 0;
1.1       oster    1579:
                   1580:        case RAIDFRAME_FAIL_DISK:
1.366     christos 1581:                return rf_fail_disk(raidPtr, data);
1.9       oster    1582:
                   1583:                /* invoke a copyback operation after recon on whatever disk
                   1584:                 * needs it, if any */
                   1585:        case RAIDFRAME_COPYBACK:
1.24      oster    1586:
1.42      oster    1587:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1588:                        /* This makes no sense on a RAID 0!! */
1.367     christos 1589:                        return EINVAL;
1.24      oster    1590:                }
                   1591:
1.42      oster    1592:                if (raidPtr->copyback_in_progress == 1) {
1.37      oster    1593:                        /* Copyback is already in progress! */
1.367     christos 1594:                        return EINVAL;
1.37      oster    1595:                }
1.27      oster    1596:
1.367     christos 1597:                return RF_CREATE_THREAD(raidPtr->copyback_thread,
                   1598:                    rf_CopybackThread, raidPtr, "raid_copyback");
1.9       oster    1599:
1.1       oster    1600:                /* return the percentage completion of reconstruction */
1.37      oster    1601:        case RAIDFRAME_CHECK_RECON_STATUS:
1.367     christos 1602:                return rf_check_recon_status(raidPtr, data);
                   1603:
1.83      oster    1604:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.353     mrg      1605:                rf_check_recon_status_ext(raidPtr, data);
1.367     christos 1606:                return 0;
1.9       oster    1607:
1.37      oster    1608:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42      oster    1609:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80      oster    1610:                        /* This makes no sense on a RAID 0, so tell the
                   1611:                           user it's done. */
                   1612:                        *(int *) data = 100;
1.367     christos 1613:                        return 0;
1.37      oster    1614:                }
1.42      oster    1615:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.186     perry    1616:                        *(int *) data = 100 *
                   1617:                                raidPtr->parity_rewrite_stripes_done /
1.83      oster    1618:                                raidPtr->Layout.numStripe;
1.37      oster    1619:                } else {
                   1620:                        *(int *) data = 100;
                   1621:                }
1.367     christos 1622:                return 0;
1.37      oster    1623:
1.83      oster    1624:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.353     mrg      1625:                rf_check_parityrewrite_status_ext(raidPtr, data);
1.367     christos 1626:                return 0;
1.83      oster    1627:
1.37      oster    1628:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42      oster    1629:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37      oster    1630:                        /* This makes no sense on a RAID 0 */
1.83      oster    1631:                        *(int *) data = 100;
1.367     christos 1632:                        return 0;
1.37      oster    1633:                }
1.42      oster    1634:                if (raidPtr->copyback_in_progress == 1) {
                   1635:                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                   1636:                                raidPtr->Layout.numStripe;
1.37      oster    1637:                } else {
                   1638:                        *(int *) data = 100;
                   1639:                }
1.367     christos 1640:                return 0;
1.37      oster    1641:
1.83      oster    1642:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.353     mrg      1643:                rf_check_copyback_status_ext(raidPtr, data);
                   1644:                return 0;
1.37      oster    1645:
1.341     christos 1646:        case RAIDFRAME_SET_LAST_UNIT:
                   1647:                for (column = 0; column < raidPtr->numCol; column++)
                   1648:                        if (raidPtr->Disks[column].status != rf_ds_optimal)
                   1649:                                return EBUSY;
                   1650:
                   1651:                for (column = 0; column < raidPtr->numCol; column++) {
                   1652:                        clabel = raidget_component_label(raidPtr, column);
                   1653:                        clabel->last_unit = *(int *)data;
                   1654:                        raidflush_component_label(raidPtr, column);
                   1655:                }
                   1656:                rs->sc_cflags |= RAIDF_UNIT_CHANGED;
                   1657:                return 0;
                   1658:
1.9       oster    1659:                /* the sparetable daemon calls this to wait for the kernel to
                   1660:                 * need a spare table. this ioctl does not return until a
                   1661:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1662:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1663:                 * -- I should either compute the spare table in the kernel,
                   1664:                 * or have a different -- XXX XXX -- interface (a different
1.42      oster    1665:                 * character device) for delivering the table     -- XXX */
1.367     christos 1666: #if RF_DISABLED
1.1       oster    1667:        case RAIDFRAME_SPARET_WAIT:
1.287     mrg      1668:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1669:                while (!rf_sparet_wait_queue)
1.287     mrg      1670:                        rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.367     christos 1671:                RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1.1       oster    1672:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287     mrg      1673:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1674:
1.42      oster    1675:                /* structure assignment */
1.186     perry    1676:                *((RF_SparetWait_t *) data) = *waitreq;
1.9       oster    1677:
1.1       oster    1678:                RF_Free(waitreq, sizeof(*waitreq));
1.367     christos 1679:                return 0;
1.9       oster    1680:
                   1681:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1682:                 * code in it that will cause the dameon to exit */
1.1       oster    1683:        case RAIDFRAME_ABORT_SPARET_WAIT:
1.374     christos 1684:                waitreq = RF_Malloc(sizeof(*waitreq));
1.1       oster    1685:                waitreq->fcol = -1;
1.287     mrg      1686:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1687:                waitreq->next = rf_sparet_wait_queue;
                   1688:                rf_sparet_wait_queue = waitreq;
1.367     christos 1689:                rf_broadcast_cond2(rf_sparet_wait_cv);
1.287     mrg      1690:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.367     christos 1691:                return 0;
1.1       oster    1692:
1.9       oster    1693:                /* used by the spare table daemon to deliver a spare table
                   1694:                 * into the kernel */
1.1       oster    1695:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1696:
1.1       oster    1697:                /* install the spare table */
1.42      oster    1698:                retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9       oster    1699:
                   1700:                /* respond to the requestor.  the return status of the spare
                   1701:                 * table installation is passed in the "fcol" field */
1.374     christos 1702:                waitred = RF_Malloc(sizeof(*waitreq));
1.1       oster    1703:                waitreq->fcol = retcode;
1.287     mrg      1704:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1705:                waitreq->next = rf_sparet_resp_queue;
                   1706:                rf_sparet_resp_queue = waitreq;
1.287     mrg      1707:                rf_broadcast_cond2(rf_sparet_resp_cv);
                   1708:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1709:
1.367     christos 1710:                return retcode;
                   1711: #endif
                   1712:        default:
1.372     christos 1713:                /*
                   1714:                 * Don't bother trying to load compat modules
                   1715:                 * if it is not our ioctl. This is more efficient
                   1716:                 * and makes rump tests not depend on compat code
                   1717:                 */
                   1718:                if (IOCGROUP(cmd) != 'r')
                   1719:                        break;
1.367     christos 1720: #ifdef _LP64
                   1721:                if ((l->l_proc->p_flag & PK_32) != 0) {
                   1722:                        module_autoload("compat_netbsd32_raid",
                   1723:                            MODULE_CLASS_EXEC);
1.376     pgoyette 1724:                        MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1.367     christos 1725:                            (rs, cmd, data), enosys(), retcode);
                   1726:                        if (retcode != EPASSTHROUGH)
                   1727:                                return retcode;
                   1728:                }
1.1       oster    1729: #endif
1.367     christos 1730:                module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1.376     pgoyette 1731:                MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1.367     christos 1732:                    (rs, cmd, data), enosys(), retcode);
                   1733:                if (retcode != EPASSTHROUGH)
                   1734:                        return retcode;
1.1       oster    1735:
1.367     christos 1736:                module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1.376     pgoyette 1737:                MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1.367     christos 1738:                    (rs, cmd, data), enosys(), retcode);
                   1739:                if (retcode != EPASSTHROUGH)
                   1740:                        return retcode;
1.36      oster    1741:                break; /* fall through to the os-specific code below */
1.1       oster    1742:
                   1743:        }
1.9       oster    1744:
1.42      oster    1745:        if (!raidPtr->valid)
1.389     skrll    1746:                return EINVAL;
1.9       oster    1747:
1.1       oster    1748:        /*
                   1749:         * Add support for "regular" device ioctls here.
                   1750:         */
1.385     riastrad 1751:
1.1       oster    1752:        switch (cmd) {
1.348     jdolecek 1753:        case DIOCGCACHE:
                   1754:                retcode = rf_get_component_caches(raidPtr, (int *)data);
                   1755:                break;
                   1756:
1.252     oster    1757:        case DIOCCACHESYNC:
1.390     christos 1758:                retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1.347     jdolecek 1759:                break;
1.298     buhrow   1760:
1.1       oster    1761:        default:
1.346     jdolecek 1762:                retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1.347     jdolecek 1763:                break;
1.1       oster    1764:        }
1.346     jdolecek 1765:
1.389     skrll    1766:        return retcode;
1.1       oster    1767:
                   1768: }
                   1769:
                   1770:
1.9       oster    1771: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1772:    RAIDframe device.  */
                   1773:
                   1774:
1.59      oster    1775: static void
1.300     christos 1776: raidinit(struct raid_softc *rs)
1.1       oster    1777: {
1.262     cegger   1778:        cfdata_t cf;
1.335     mlelstv  1779:        unsigned int unit;
                   1780:        struct dk_softc *dksc = &rs->sc_dksc;
1.300     christos 1781:        RF_Raid_t *raidPtr = &rs->sc_r;
1.335     mlelstv  1782:        device_t dev;
1.1       oster    1783:
1.59      oster    1784:        unit = raidPtr->raidid;
1.1       oster    1785:
1.179     itojun   1786:        /* XXX doesn't check bounds. */
1.335     mlelstv  1787:        snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1.1       oster    1788:
1.217     oster    1789:        /* attach the pseudo device */
                   1790:        cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
                   1791:        cf->cf_name = raid_cd.cd_name;
                   1792:        cf->cf_atname = raid_cd.cd_name;
                   1793:        cf->cf_unit = unit;
                   1794:        cf->cf_fstate = FSTATE_STAR;
                   1795:
1.335     mlelstv  1796:        dev = config_attach_pseudo(cf);
                   1797:        if (dev == NULL) {
1.217     oster    1798:                printf("raid%d: config_attach_pseudo failed\n",
1.270     christos 1799:                    raidPtr->raidid);
1.265     pooka    1800:                free(cf, M_RAIDFRAME);
                   1801:                return;
1.217     oster    1802:        }
                   1803:
1.335     mlelstv  1804:        /* provide a backpointer to the real softc */
                   1805:        raidsoftc(dev) = rs;
                   1806:
1.1       oster    1807:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1808:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1809:         * with disklabels. */
1.335     mlelstv  1810:        dk_init(dksc, dev, DKTYPE_RAID);
                   1811:        disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.1       oster    1812:
                   1813:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1814:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1815:
1.9       oster    1816:        rs->sc_size = raidPtr->totalSectors;
1.234     oster    1817:
1.335     mlelstv  1818:        /* Attach dk and disk subsystems */
                   1819:        dk_attach(dksc);
                   1820:        disk_attach(&dksc->sc_dkdev);
1.318     mlelstv  1821:        rf_set_geometry(rs, raidPtr);
                   1822:
1.335     mlelstv  1823:        bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
                   1824:
                   1825:        /* mark unit as usuable */
                   1826:        rs->sc_flags |= RAIDF_INITED;
1.234     oster    1827:
1.335     mlelstv  1828:        dkwedge_discover(&dksc->sc_dkdev);
1.1       oster    1829: }
1.335     mlelstv  1830:
1.150     oster    1831: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1       oster    1832: /* wake up the daemon & tell it to get us a spare table
                   1833:  * XXX
1.9       oster    1834:  * the entries in the queues should be tagged with the raidPtr
1.186     perry    1835:  * so that in the extremely rare case that two recons happen at once,
1.11      oster    1836:  * we know for which device were requesting a spare table
1.1       oster    1837:  * XXX
1.186     perry    1838:  *
1.39      oster    1839:  * XXX This code is not currently used. GO
1.1       oster    1840:  */
1.186     perry    1841: int
1.169     oster    1842: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9       oster    1843: {
                   1844:        int     retcode;
                   1845:
1.287     mrg      1846:        rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1847:        req->next = rf_sparet_wait_queue;
                   1848:        rf_sparet_wait_queue = req;
1.289     mrg      1849:        rf_broadcast_cond2(rf_sparet_wait_cv);
1.9       oster    1850:
                   1851:        /* mpsleep unlocks the mutex */
                   1852:        while (!rf_sparet_resp_queue) {
1.289     mrg      1853:                rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9       oster    1854:        }
                   1855:        req = rf_sparet_resp_queue;
                   1856:        rf_sparet_resp_queue = req->next;
1.287     mrg      1857:        rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1858:
                   1859:        retcode = req->fcol;
                   1860:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1861:                                         * alloc'd */
1.389     skrll    1862:        return retcode;
1.1       oster    1863: }
1.150     oster    1864: #endif
1.39      oster    1865:
1.186     perry    1866: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11      oster    1867:  * bp & passes it down.
1.1       oster    1868:  * any calls originating in the kernel must use non-blocking I/O
                   1869:  * do some extra sanity checking to return "appropriate" error values for
                   1870:  * certain conditions (to make some standard utilities work)
1.186     perry    1871:  *
1.34      oster    1872:  * Formerly known as: rf_DoAccessKernel
1.1       oster    1873:  */
1.34      oster    1874: void
1.169     oster    1875: raidstart(RF_Raid_t *raidPtr)
1.1       oster    1876: {
                   1877:        struct raid_softc *rs;
1.335     mlelstv  1878:        struct dk_softc *dksc;
1.1       oster    1879:
1.300     christos 1880:        rs = raidPtr->softc;
1.335     mlelstv  1881:        dksc = &rs->sc_dksc;
1.56      oster    1882:        /* quick check to see if anything has died recently */
1.291     mrg      1883:        rf_lock_mutex2(raidPtr->mutex);
1.56      oster    1884:        if (raidPtr->numNewFailures > 0) {
1.291     mrg      1885:                rf_unlock_mutex2(raidPtr->mutex);
1.186     perry    1886:                rf_update_component_labels(raidPtr,
1.91      oster    1887:                                           RF_NORMAL_COMPONENT_UPDATE);
1.291     mrg      1888:                rf_lock_mutex2(raidPtr->mutex);
1.56      oster    1889:                raidPtr->numNewFailures--;
                   1890:        }
1.335     mlelstv  1891:        rf_unlock_mutex2(raidPtr->mutex);
1.56      oster    1892:
1.335     mlelstv  1893:        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                   1894:                printf("raid%d: raidstart not ready\n", raidPtr->raidid);
                   1895:                return;
                   1896:        }
1.34      oster    1897:
1.335     mlelstv  1898:        dk_start(dksc, NULL);
                   1899: }
1.34      oster    1900:
1.335     mlelstv  1901: static int
                   1902: raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
                   1903: {
                   1904:        RF_SectorCount_t num_blocks, pb, sum;
                   1905:        RF_RaidAddr_t raid_addr;
                   1906:        daddr_t blocknum;
                   1907:        int     do_async;
                   1908:        int rc;
1.186     perry    1909:
1.335     mlelstv  1910:        rf_lock_mutex2(raidPtr->mutex);
                   1911:        if (raidPtr->openings == 0) {
                   1912:                rf_unlock_mutex2(raidPtr->mutex);
                   1913:                return EAGAIN;
                   1914:        }
                   1915:        rf_unlock_mutex2(raidPtr->mutex);
1.186     perry    1916:
1.335     mlelstv  1917:        blocknum = bp->b_rawblkno;
1.186     perry    1918:
1.335     mlelstv  1919:        db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
                   1920:                    (int) blocknum));
1.1       oster    1921:
1.335     mlelstv  1922:        db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   1923:        db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1       oster    1924:
1.335     mlelstv  1925:        /* *THIS* is where we adjust what block we're going to...
                   1926:         * but DO NOT TOUCH bp->b_blkno!!! */
                   1927:        raid_addr = blocknum;
                   1928:
                   1929:        num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
                   1930:        pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
                   1931:        sum = raid_addr + num_blocks + pb;
                   1932:        if (1 || rf_debugKernelAccess) {
                   1933:                db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   1934:                            (int) raid_addr, (int) sum, (int) num_blocks,
                   1935:                            (int) pb, (int) bp->b_resid));
                   1936:        }
                   1937:        if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
                   1938:            || (sum < num_blocks) || (sum < pb)) {
                   1939:                rc = ENOSPC;
                   1940:                goto done;
                   1941:        }
                   1942:        /*
                   1943:         * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   1944:         */
1.186     perry    1945:
1.335     mlelstv  1946:        if (bp->b_bcount & raidPtr->sectorMask) {
                   1947:                rc = ENOSPC;
                   1948:                goto done;
                   1949:        }
                   1950:        db1_printf(("Calling DoAccess..\n"));
1.99      oster    1951:
1.20      oster    1952:
1.335     mlelstv  1953:        rf_lock_mutex2(raidPtr->mutex);
                   1954:        raidPtr->openings--;
1.291     mrg      1955:        rf_unlock_mutex2(raidPtr->mutex);
1.20      oster    1956:
1.335     mlelstv  1957:        /*
                   1958:         * Everything is async.
                   1959:         */
                   1960:        do_async = 1;
1.20      oster    1961:
1.335     mlelstv  1962:        /* don't ever condition on bp->b_flags & B_WRITE.
                   1963:         * always condition on B_READ instead */
1.7       explorer 1964:
1.335     mlelstv  1965:        rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                   1966:                         RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   1967:                         do_async, raid_addr, num_blocks,
                   1968:                         bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
                   1969:
                   1970: done:
                   1971:        return rc;
                   1972: }
1.7       explorer 1973:
1.1       oster    1974: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   1975:
1.186     perry    1976: int
1.169     oster    1977: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1       oster    1978: {
1.9       oster    1979:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    1980:        struct buf *bp;
1.9       oster    1981:
1.1       oster    1982:        req->queue = queue;
                   1983:        bp = req->bp;
                   1984:
                   1985:        switch (req->type) {
1.9       oster    1986:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
1.1       oster    1987:                /* XXX need to do something extra here.. */
1.9       oster    1988:                /* I'm leaving this in, as I've never actually seen it used,
                   1989:                 * and I'd like folks to report it... GO */
1.391     mrg      1990:                printf("%s: WAKEUP CALLED\n", __func__);
1.1       oster    1991:                queue->numOutstanding++;
                   1992:
1.197     oster    1993:                bp->b_flags = 0;
1.207     simonb   1994:                bp->b_private = req;
1.1       oster    1995:
1.194     oster    1996:                KernelWakeupFunc(bp);
1.1       oster    1997:                break;
1.9       oster    1998:
1.1       oster    1999:        case RF_IO_TYPE_READ:
                   2000:        case RF_IO_TYPE_WRITE:
1.175     oster    2001: #if RF_ACC_TRACE > 0
1.1       oster    2002:                if (req->tracerec) {
                   2003:                        RF_ETIMER_START(req->tracerec->timer);
                   2004:                }
1.175     oster    2005: #endif
1.194     oster    2006:                InitBP(bp, queue->rf_cinfo->ci_vp,
1.197     oster    2007:                    op, queue->rf_cinfo->ci_dev,
1.9       oster    2008:                    req->sectorOffset, req->numSector,
                   2009:                    req->buf, KernelWakeupFunc, (void *) req,
1.384     jdolecek 2010:                    queue->raidPtr->logBytesPerSector);
1.1       oster    2011:
                   2012:                if (rf_debugKernelAccess) {
1.9       oster    2013:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   2014:                                (long) bp->b_blkno));
1.1       oster    2015:                }
                   2016:                queue->numOutstanding++;
                   2017:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    2018:                /* acc wouldn't have been let in if there were any pending
                   2019:                 * reqs at any other priority */
1.1       oster    2020:                queue->curPriority = req->priority;
                   2021:
1.166     oster    2022:                db1_printf(("Going for %c to unit %d col %d\n",
1.186     perry    2023:                            req->type, queue->raidPtr->raidid,
1.166     oster    2024:                            queue->col));
1.1       oster    2025:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    2026:                        (int) req->sectorOffset, (int) req->numSector,
                   2027:                        (int) (req->numSector <<
                   2028:                            queue->raidPtr->logBytesPerSector),
                   2029:                        (int) queue->raidPtr->logBytesPerSector));
1.256     oster    2030:
                   2031:                /*
1.385     riastrad 2032:                 * XXX: drop lock here since this can block at
1.256     oster    2033:                 * least with backing SCSI devices.  Retake it
                   2034:                 * to minimize fuss with calling interfaces.
                   2035:                 */
                   2036:
                   2037:                RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247     oster    2038:                bdev_strategy(bp);
1.256     oster    2039:                RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1       oster    2040:                break;
1.9       oster    2041:
1.1       oster    2042:        default:
                   2043:                panic("bad req->type in rf_DispatchKernelIO");
                   2044:        }
                   2045:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.134     oster    2046:
1.389     skrll    2047:        return 0;
1.1       oster    2048: }
1.9       oster    2049: /* this is the callback function associated with a I/O invoked from
1.1       oster    2050:    kernel code.
                   2051:  */
1.186     perry    2052: static void
1.194     oster    2053: KernelWakeupFunc(struct buf *bp)
1.9       oster    2054: {
                   2055:        RF_DiskQueueData_t *req = NULL;
                   2056:        RF_DiskQueue_t *queue;
                   2057:
                   2058:        db1_printf(("recovering the request queue:\n"));
1.285     mrg      2059:
1.207     simonb   2060:        req = bp->b_private;
1.1       oster    2061:
1.9       oster    2062:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    2063:
1.286     mrg      2064:        rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285     mrg      2065:
1.175     oster    2066: #if RF_ACC_TRACE > 0
1.9       oster    2067:        if (req->tracerec) {
                   2068:                RF_ETIMER_STOP(req->tracerec->timer);
                   2069:                RF_ETIMER_EVAL(req->tracerec->timer);
1.288     mrg      2070:                rf_lock_mutex2(rf_tracing_mutex);
1.9       oster    2071:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2072:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2073:                req->tracerec->num_phys_ios++;
1.288     mrg      2074:                rf_unlock_mutex2(rf_tracing_mutex);
1.9       oster    2075:        }
1.175     oster    2076: #endif
1.1       oster    2077:
1.230     ad       2078:        /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9       oster    2079:         * ballistic, and mark the component as hosed... */
1.36      oster    2080:
1.230     ad       2081:        if (bp->b_error != 0) {
1.9       oster    2082:                /* Mark the disk as dead */
                   2083:                /* but only mark it once... */
1.186     perry    2084:                /* and only if it wouldn't leave this RAID set
1.183     oster    2085:                   completely broken */
1.193     oster    2086:                if (((queue->raidPtr->Disks[queue->col].status ==
                   2087:                      rf_ds_optimal) ||
                   2088:                     (queue->raidPtr->Disks[queue->col].status ==
1.385     riastrad 2089:                      rf_ds_used_spare)) &&
1.193     oster    2090:                     (queue->raidPtr->numFailures <
1.204     simonb   2091:                      queue->raidPtr->Layout.map->faultsTolerated)) {
1.322     prlw1    2092:                        printf("raid%d: IO Error (%d). Marking %s as failed.\n",
1.136     oster    2093:                               queue->raidPtr->raidid,
1.322     prlw1    2094:                               bp->b_error,
1.166     oster    2095:                               queue->raidPtr->Disks[queue->col].devname);
                   2096:                        queue->raidPtr->Disks[queue->col].status =
1.9       oster    2097:                            rf_ds_failed;
1.166     oster    2098:                        queue->raidPtr->status = rf_rs_degraded;
1.9       oster    2099:                        queue->raidPtr->numFailures++;
1.56      oster    2100:                        queue->raidPtr->numNewFailures++;
1.9       oster    2101:                } else {        /* Disk is already dead... */
                   2102:                        /* printf("Disk already marked as dead!\n"); */
                   2103:                }
1.4       oster    2104:
1.9       oster    2105:        }
1.4       oster    2106:
1.143     oster    2107:        /* Fill in the error value */
1.230     ad       2108:        req->error = bp->b_error;
1.143     oster    2109:
                   2110:        /* Drop this one on the "finished" queue... */
                   2111:        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
                   2112:
                   2113:        /* Let the raidio thread know there is work to be done. */
1.286     mrg      2114:        rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143     oster    2115:
1.286     mrg      2116:        rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1       oster    2117: }
                   2118:
                   2119:
                   2120: /*
                   2121:  * initialize a buf structure for doing an I/O in the kernel.
                   2122:  */
1.186     perry    2123: static void
1.169     oster    2124: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225     christos 2125:        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.384     jdolecek 2126:        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
1.9       oster    2127: {
1.384     jdolecek 2128:        bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
1.242     ad       2129:        bp->b_oflags = 0;
                   2130:        bp->b_cflags = 0;
1.9       oster    2131:        bp->b_bcount = numSect << logBytesPerSector;
                   2132:        bp->b_bufsize = bp->b_bcount;
                   2133:        bp->b_error = 0;
                   2134:        bp->b_dev = dev;
1.187     christos 2135:        bp->b_data = bf;
1.275     mrg      2136:        bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9       oster    2137:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
1.1       oster    2138:        if (bp->b_bcount == 0) {
1.141     provos   2139:                panic("bp->b_bcount is zero in InitBP!!");
1.1       oster    2140:        }
1.9       oster    2141:        bp->b_iodone = cbFunc;
1.207     simonb   2142:        bp->b_private = cbArg;
1.1       oster    2143: }
                   2144:
                   2145: /*
                   2146:  * Wait interruptibly for an exclusive lock.
                   2147:  *
                   2148:  * XXX
                   2149:  * Several drivers do this; it should be abstracted and made MP-safe.
                   2150:  * (Hmm... where have we seen this warning before :->  GO )
                   2151:  */
                   2152: static int
1.169     oster    2153: raidlock(struct raid_softc *rs)
1.1       oster    2154: {
1.9       oster    2155:        int     error;
1.1       oster    2156:
1.335     mlelstv  2157:        error = 0;
1.327     pgoyette 2158:        mutex_enter(&rs->sc_mutex);
1.1       oster    2159:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   2160:                rs->sc_flags |= RAIDF_WANTED;
1.327     pgoyette 2161:                error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
                   2162:                if (error != 0)
1.335     mlelstv  2163:                        goto done;
1.1       oster    2164:        }
                   2165:        rs->sc_flags |= RAIDF_LOCKED;
1.335     mlelstv  2166: done:
1.327     pgoyette 2167:        mutex_exit(&rs->sc_mutex);
1.389     skrll    2168:        return error;
1.1       oster    2169: }
                   2170: /*
                   2171:  * Unlock and wake up any waiters.
                   2172:  */
                   2173: static void
1.169     oster    2174: raidunlock(struct raid_softc *rs)
1.1       oster    2175: {
                   2176:
1.327     pgoyette 2177:        mutex_enter(&rs->sc_mutex);
1.1       oster    2178:        rs->sc_flags &= ~RAIDF_LOCKED;
                   2179:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   2180:                rs->sc_flags &= ~RAIDF_WANTED;
1.327     pgoyette 2181:                cv_broadcast(&rs->sc_cv);
1.1       oster    2182:        }
1.327     pgoyette 2183:        mutex_exit(&rs->sc_mutex);
1.11      oster    2184: }
1.186     perry    2185:
1.11      oster    2186:
                   2187: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   2188: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
1.269     jld      2189: #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
1.11      oster    2190:
1.276     mrg      2191: static daddr_t
                   2192: rf_component_info_offset(void)
                   2193: {
                   2194:
                   2195:        return RF_COMPONENT_INFO_OFFSET;
                   2196: }
                   2197:
                   2198: static daddr_t
                   2199: rf_component_info_size(unsigned secsize)
                   2200: {
                   2201:        daddr_t info_size;
                   2202:
                   2203:        KASSERT(secsize);
                   2204:        if (secsize > RF_COMPONENT_INFO_SIZE)
                   2205:                info_size = secsize;
                   2206:        else
                   2207:                info_size = RF_COMPONENT_INFO_SIZE;
                   2208:
                   2209:        return info_size;
                   2210: }
                   2211:
                   2212: static daddr_t
                   2213: rf_parity_map_offset(RF_Raid_t *raidPtr)
                   2214: {
                   2215:        daddr_t map_offset;
                   2216:
                   2217:        KASSERT(raidPtr->bytesPerSector);
                   2218:        if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
                   2219:                map_offset = raidPtr->bytesPerSector;
                   2220:        else
                   2221:                map_offset = RF_COMPONENT_INFO_SIZE;
                   2222:        map_offset += rf_component_info_offset();
                   2223:
                   2224:        return map_offset;
                   2225: }
                   2226:
                   2227: static daddr_t
                   2228: rf_parity_map_size(RF_Raid_t *raidPtr)
                   2229: {
                   2230:        daddr_t map_size;
                   2231:
                   2232:        if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
                   2233:                map_size = raidPtr->bytesPerSector;
                   2234:        else
                   2235:                map_size = RF_PARITY_MAP_SIZE;
                   2236:
                   2237:        return map_size;
                   2238: }
                   2239:
1.186     perry    2240: int
1.269     jld      2241: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12      oster    2242: {
1.269     jld      2243:        RF_ComponentLabel_t *clabel;
                   2244:
                   2245:        clabel = raidget_component_label(raidPtr, col);
                   2246:        clabel->clean = RF_RAID_CLEAN;
                   2247:        raidflush_component_label(raidPtr, col);
1.12      oster    2248:        return(0);
                   2249: }
                   2250:
                   2251:
1.186     perry    2252: int
1.269     jld      2253: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11      oster    2254: {
1.269     jld      2255:        RF_ComponentLabel_t *clabel;
                   2256:
                   2257:        clabel = raidget_component_label(raidPtr, col);
                   2258:        clabel->clean = RF_RAID_DIRTY;
                   2259:        raidflush_component_label(raidPtr, col);
1.11      oster    2260:        return(0);
                   2261: }
                   2262:
                   2263: int
1.269     jld      2264: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2265: {
1.276     mrg      2266:        KASSERT(raidPtr->bytesPerSector);
1.394   ! mrg      2267:
1.276     mrg      2268:        return raidread_component_label(raidPtr->bytesPerSector,
                   2269:            raidPtr->Disks[col].dev,
1.385     riastrad 2270:            raidPtr->raid_cinfo[col].ci_vp,
1.269     jld      2271:            &raidPtr->raid_cinfo[col].ci_label);
                   2272: }
                   2273:
                   2274: RF_ComponentLabel_t *
                   2275: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2276: {
                   2277:        return &raidPtr->raid_cinfo[col].ci_label;
                   2278: }
                   2279:
                   2280: int
                   2281: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2282: {
                   2283:        RF_ComponentLabel_t *label;
                   2284:
                   2285:        label = &raidPtr->raid_cinfo[col].ci_label;
                   2286:        label->mod_counter = raidPtr->mod_counter;
                   2287: #ifndef RF_NO_PARITY_MAP
                   2288:        label->parity_map_modcount = label->mod_counter;
                   2289: #endif
1.276     mrg      2290:        return raidwrite_component_label(raidPtr->bytesPerSector,
                   2291:            raidPtr->Disks[col].dev,
1.269     jld      2292:            raidPtr->raid_cinfo[col].ci_vp, label);
                   2293: }
                   2294:
1.394   ! mrg      2295: /*
        !          2296:  * Swap the label endianness.
        !          2297:  *
        !          2298:  * Everything in the component label is 4-byte-swapped except the version,
        !          2299:  * which is kept in the byte-swapped version at all times, and indicates
        !          2300:  * for the writer that a swap is necessary.
        !          2301:  *
        !          2302:  * For reads it is expected that out_label == clabel, but writes expect
        !          2303:  * separate labels so only the re-swapped label is written out to disk,
        !          2304:  * leaving the swapped-except-version internally.
        !          2305:  *
        !          2306:  * Only support swapping label version 2.
        !          2307:  */
        !          2308: static void
        !          2309: rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
        !          2310: {
        !          2311:        int     *in, *out, *in_last;
        !          2312:
        !          2313:        KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
        !          2314:
        !          2315:        /* Don't swap the label, but do copy it. */
        !          2316:        out_label->version = clabel->version;
        !          2317:
        !          2318:        in = &clabel->serial_number;
        !          2319:        in_last = &clabel->future_use2[42];
        !          2320:        out = &out_label->serial_number;
        !          2321:
        !          2322:        for (; in < in_last; in++, out++)
        !          2323:                *out = bswap32(*in);
        !          2324: }
1.269     jld      2325:
                   2326: static int
1.276     mrg      2327: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269     jld      2328:     RF_ComponentLabel_t *clabel)
                   2329: {
1.394   ! mrg      2330:        int error;
        !          2331:
        !          2332:        error = raidread_component_area(dev, b_vp, clabel,
1.269     jld      2333:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2334:            rf_component_info_offset(),
                   2335:            rf_component_info_size(secsize));
1.394   ! mrg      2336:
        !          2337:        if (error == 0 &&
        !          2338:            clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
        !          2339:                rf_swap_label(clabel, clabel);
        !          2340:        }
        !          2341:
        !          2342:        return error;
1.269     jld      2343: }
                   2344:
                   2345: /* ARGSUSED */
                   2346: static int
                   2347: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
                   2348:     size_t msize, daddr_t offset, daddr_t dsize)
1.11      oster    2349: {
                   2350:        struct buf *bp;
                   2351:        int error;
1.186     perry    2352:
1.11      oster    2353:        /* XXX should probably ensure that we don't try to do this if
1.186     perry    2354:           someone has changed rf_protected_sectors. */
1.11      oster    2355:
1.98      oster    2356:        if (b_vp == NULL) {
                   2357:                /* For whatever reason, this component is not valid.
                   2358:                   Don't try to read a component label from it. */
                   2359:                return(EINVAL);
                   2360:        }
                   2361:
1.11      oster    2362:        /* get a block of the appropriate size... */
1.269     jld      2363:        bp = geteblk((int)dsize);
1.11      oster    2364:        bp->b_dev = dev;
                   2365:
                   2366:        /* get our ducks in a row for the read */
1.269     jld      2367:        bp->b_blkno = offset / DEV_BSIZE;
                   2368:        bp->b_bcount = dsize;
1.100     chs      2369:        bp->b_flags |= B_READ;
1.269     jld      2370:        bp->b_resid = dsize;
1.11      oster    2371:
1.331     mlelstv  2372:        bdev_strategy(bp);
1.340     christos 2373:        error = biowait(bp);
1.11      oster    2374:
                   2375:        if (!error) {
1.269     jld      2376:                memcpy(data, bp->b_data, msize);
1.204     simonb   2377:        }
1.11      oster    2378:
1.233     ad       2379:        brelse(bp, 0);
1.11      oster    2380:        return(error);
                   2381: }
1.269     jld      2382:
                   2383: static int
1.276     mrg      2384: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
                   2385:     RF_ComponentLabel_t *clabel)
1.269     jld      2386: {
1.394   ! mrg      2387:        RF_ComponentLabel_t *clabel_write = clabel;
        !          2388:        RF_ComponentLabel_t lclabel;
        !          2389:        int error;
        !          2390:
        !          2391:        if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
        !          2392:                clabel_write = &lclabel;
        !          2393:                rf_swap_label(clabel, clabel_write);
        !          2394:        }
        !          2395:        error = raidwrite_component_area(dev, b_vp, clabel_write,
1.269     jld      2396:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2397:            rf_component_info_offset(),
                   2398:            rf_component_info_size(secsize), 0);
1.394   ! mrg      2399:
        !          2400:        return error;
1.269     jld      2401: }
                   2402:
1.11      oster    2403: /* ARGSUSED */
1.269     jld      2404: static int
1.385     riastrad 2405: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
1.269     jld      2406:     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11      oster    2407: {
                   2408:        struct buf *bp;
                   2409:        int error;
                   2410:
                   2411:        /* get a block of the appropriate size... */
1.269     jld      2412:        bp = geteblk((int)dsize);
1.11      oster    2413:        bp->b_dev = dev;
                   2414:
                   2415:        /* get our ducks in a row for the write */
1.269     jld      2416:        bp->b_blkno = offset / DEV_BSIZE;
                   2417:        bp->b_bcount = dsize;
                   2418:        bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
                   2419:        bp->b_resid = dsize;
1.11      oster    2420:
1.269     jld      2421:        memset(bp->b_data, 0, dsize);
                   2422:        memcpy(bp->b_data, data, msize);
1.11      oster    2423:
1.331     mlelstv  2424:        bdev_strategy(bp);
1.269     jld      2425:        if (asyncp)
                   2426:                return 0;
1.340     christos 2427:        error = biowait(bp);
1.233     ad       2428:        brelse(bp, 0);
1.11      oster    2429:        if (error) {
1.48      oster    2430: #if 1
1.11      oster    2431:                printf("Failed to write RAID component info!\n");
1.48      oster    2432: #endif
1.11      oster    2433:        }
                   2434:
                   2435:        return(error);
1.1       oster    2436: }
1.12      oster    2437:
1.186     perry    2438: void
1.269     jld      2439: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2440: {
                   2441:        int c;
                   2442:
                   2443:        for (c = 0; c < raidPtr->numCol; c++) {
                   2444:                /* Skip dead disks. */
                   2445:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2446:                        continue;
                   2447:                /* XXXjld: what if an error occurs here? */
                   2448:                raidwrite_component_area(raidPtr->Disks[c].dev,
                   2449:                    raidPtr->raid_cinfo[c].ci_vp, map,
                   2450:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2451:                    rf_parity_map_offset(raidPtr),
                   2452:                    rf_parity_map_size(raidPtr), 0);
1.269     jld      2453:        }
                   2454: }
                   2455:
                   2456: void
                   2457: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2458: {
                   2459:        struct rf_paritymap_ondisk tmp;
1.272     oster    2460:        int c,first;
1.269     jld      2461:
1.272     oster    2462:        first=1;
1.269     jld      2463:        for (c = 0; c < raidPtr->numCol; c++) {
                   2464:                /* Skip dead disks. */
                   2465:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2466:                        continue;
                   2467:                raidread_component_area(raidPtr->Disks[c].dev,
                   2468:                    raidPtr->raid_cinfo[c].ci_vp, &tmp,
                   2469:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2470:                    rf_parity_map_offset(raidPtr),
                   2471:                    rf_parity_map_size(raidPtr));
1.272     oster    2472:                if (first) {
1.269     jld      2473:                        memcpy(map, &tmp, sizeof(*map));
1.272     oster    2474:                        first = 0;
1.269     jld      2475:                } else {
                   2476:                        rf_paritymap_merge(map, &tmp);
                   2477:                }
                   2478:        }
                   2479: }
                   2480:
                   2481: void
1.169     oster    2482: rf_markalldirty(RF_Raid_t *raidPtr)
1.12      oster    2483: {
1.269     jld      2484:        RF_ComponentLabel_t *clabel;
1.146     oster    2485:        int sparecol;
1.166     oster    2486:        int c;
                   2487:        int j;
                   2488:        int scol = -1;
1.12      oster    2489:
                   2490:        raidPtr->mod_counter++;
1.166     oster    2491:        for (c = 0; c < raidPtr->numCol; c++) {
                   2492:                /* we don't want to touch (at all) a disk that has
                   2493:                   failed */
                   2494:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269     jld      2495:                        clabel = raidget_component_label(raidPtr, c);
                   2496:                        if (clabel->status == rf_ds_spared) {
1.186     perry    2497:                                /* XXX do something special...
                   2498:                                   but whatever you do, don't
1.166     oster    2499:                                   try to access it!! */
                   2500:                        } else {
1.269     jld      2501:                                raidmarkdirty(raidPtr, c);
1.12      oster    2502:                        }
1.166     oster    2503:                }
1.186     perry    2504:        }
1.146     oster    2505:
1.12      oster    2506:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2507:                sparecol = raidPtr->numCol + c;
1.166     oster    2508:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2509:                        /*
                   2510:
                   2511:                           we claim this disk is "optimal" if it's
                   2512:                           rf_ds_used_spare, as that means it should be
                   2513:                           directly substitutable for the disk it replaced.
1.12      oster    2514:                           We note that too...
                   2515:
                   2516:                         */
                   2517:
1.166     oster    2518:                        for(j=0;j<raidPtr->numCol;j++) {
                   2519:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2520:                                        scol = j;
                   2521:                                        break;
1.12      oster    2522:                                }
                   2523:                        }
1.186     perry    2524:
1.269     jld      2525:                        clabel = raidget_component_label(raidPtr, sparecol);
1.12      oster    2526:                        /* make sure status is noted */
1.146     oster    2527:
1.269     jld      2528:                        raid_init_component_label(raidPtr, clabel);
1.146     oster    2529:
1.269     jld      2530:                        clabel->row = 0;
                   2531:                        clabel->column = scol;
1.146     oster    2532:                        /* Note: we *don't* change status from rf_ds_used_spare
                   2533:                           to rf_ds_optimal */
                   2534:                        /* clabel.status = rf_ds_optimal; */
1.186     perry    2535:
1.269     jld      2536:                        raidmarkdirty(raidPtr, sparecol);
1.12      oster    2537:                }
                   2538:        }
                   2539: }
                   2540:
1.13      oster    2541:
                   2542: void
1.169     oster    2543: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13      oster    2544: {
1.269     jld      2545:        RF_ComponentLabel_t *clabel;
1.13      oster    2546:        int sparecol;
1.166     oster    2547:        int c;
                   2548:        int j;
                   2549:        int scol;
1.341     christos 2550:        struct raid_softc *rs = raidPtr->softc;
1.13      oster    2551:
                   2552:        scol = -1;
                   2553:
1.186     perry    2554:        /* XXX should do extra checks to make sure things really are clean,
1.13      oster    2555:           rather than blindly setting the clean bit... */
                   2556:
                   2557:        raidPtr->mod_counter++;
                   2558:
1.166     oster    2559:        for (c = 0; c < raidPtr->numCol; c++) {
                   2560:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269     jld      2561:                        clabel = raidget_component_label(raidPtr, c);
1.201     oster    2562:                        /* make sure status is noted */
1.269     jld      2563:                        clabel->status = rf_ds_optimal;
1.385     riastrad 2564:
1.214     oster    2565:                        /* note what unit we are configured as */
1.341     christos 2566:                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                   2567:                                clabel->last_unit = raidPtr->raidid;
1.214     oster    2568:
1.269     jld      2569:                        raidflush_component_label(raidPtr, c);
1.166     oster    2570:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                   2571:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2572:                                        raidmarkclean(raidPtr, c);
1.91      oster    2573:                                }
1.166     oster    2574:                        }
1.186     perry    2575:                }
1.166     oster    2576:                /* else we don't touch it.. */
1.186     perry    2577:        }
1.63      oster    2578:
                   2579:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2580:                sparecol = raidPtr->numCol + c;
1.110     oster    2581:                /* Need to ensure that the reconstruct actually completed! */
1.166     oster    2582:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2583:                        /*
                   2584:
                   2585:                           we claim this disk is "optimal" if it's
                   2586:                           rf_ds_used_spare, as that means it should be
                   2587:                           directly substitutable for the disk it replaced.
1.63      oster    2588:                           We note that too...
                   2589:
                   2590:                         */
                   2591:
1.166     oster    2592:                        for(j=0;j<raidPtr->numCol;j++) {
                   2593:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2594:                                        scol = j;
                   2595:                                        break;
1.63      oster    2596:                                }
                   2597:                        }
1.186     perry    2598:
1.63      oster    2599:                        /* XXX shouldn't *really* need this... */
1.269     jld      2600:                        clabel = raidget_component_label(raidPtr, sparecol);
1.63      oster    2601:                        /* make sure status is noted */
                   2602:
1.269     jld      2603:                        raid_init_component_label(raidPtr, clabel);
                   2604:
                   2605:                        clabel->column = scol;
                   2606:                        clabel->status = rf_ds_optimal;
1.341     christos 2607:                        if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
                   2608:                                clabel->last_unit = raidPtr->raidid;
1.63      oster    2609:
1.269     jld      2610:                        raidflush_component_label(raidPtr, sparecol);
1.91      oster    2611:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13      oster    2612:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2613:                                        raidmarkclean(raidPtr, sparecol);
1.13      oster    2614:                                }
                   2615:                        }
                   2616:                }
                   2617:        }
1.68      oster    2618: }
                   2619:
                   2620: void
1.169     oster    2621: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69      oster    2622: {
                   2623:
                   2624:        if (vp != NULL) {
                   2625:                if (auto_configured == 1) {
1.96      oster    2626:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    2627:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69      oster    2628:                        vput(vp);
1.186     perry    2629:
                   2630:                } else {
1.244     ad       2631:                        (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69      oster    2632:                }
1.186     perry    2633:        }
1.69      oster    2634: }
                   2635:
                   2636:
                   2637: void
1.169     oster    2638: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68      oster    2639: {
1.186     perry    2640:        int r,c;
1.69      oster    2641:        struct vnode *vp;
                   2642:        int acd;
1.68      oster    2643:
                   2644:
                   2645:        /* We take this opportunity to close the vnodes like we should.. */
                   2646:
1.166     oster    2647:        for (c = 0; c < raidPtr->numCol; c++) {
                   2648:                vp = raidPtr->raid_cinfo[c].ci_vp;
                   2649:                acd = raidPtr->Disks[c].auto_configured;
                   2650:                rf_close_component(raidPtr, vp, acd);
                   2651:                raidPtr->raid_cinfo[c].ci_vp = NULL;
                   2652:                raidPtr->Disks[c].auto_configured = 0;
1.68      oster    2653:        }
1.166     oster    2654:
1.68      oster    2655:        for (r = 0; r < raidPtr->numSpare; r++) {
1.166     oster    2656:                vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
                   2657:                acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69      oster    2658:                rf_close_component(raidPtr, vp, acd);
1.166     oster    2659:                raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
                   2660:                raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68      oster    2661:        }
1.37      oster    2662: }
1.63      oster    2663:
1.37      oster    2664:
1.393     mrg      2665: static void
1.353     mrg      2666: rf_ReconThread(struct rf_recon_req_internal *req)
1.37      oster    2667: {
                   2668:        int     s;
                   2669:        RF_Raid_t *raidPtr;
                   2670:
                   2671:        s = splbio();
                   2672:        raidPtr = (RF_Raid_t *) req->raidPtr;
                   2673:        raidPtr->recon_in_progress = 1;
                   2674:
1.166     oster    2675:        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37      oster    2676:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
                   2677:
                   2678:        RF_Free(req, sizeof(*req));
                   2679:
                   2680:        raidPtr->recon_in_progress = 0;
                   2681:        splx(s);
                   2682:
                   2683:        /* That's all... */
1.204     simonb   2684:        kthread_exit(0);        /* does not return */
1.37      oster    2685: }
                   2686:
1.393     mrg      2687: static void
1.169     oster    2688: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37      oster    2689: {
                   2690:        int retcode;
                   2691:        int s;
                   2692:
1.184     oster    2693:        raidPtr->parity_rewrite_stripes_done = 0;
1.37      oster    2694:        raidPtr->parity_rewrite_in_progress = 1;
                   2695:        s = splbio();
                   2696:        retcode = rf_RewriteParity(raidPtr);
                   2697:        splx(s);
                   2698:        if (retcode) {
1.279     christos 2699:                printf("raid%d: Error re-writing parity (%d)!\n",
                   2700:                    raidPtr->raidid, retcode);
1.37      oster    2701:        } else {
                   2702:                /* set the clean bit!  If we shutdown correctly,
                   2703:                   the clean bit on each component label will get
                   2704:                   set */
                   2705:                raidPtr->parity_good = RF_RAID_CLEAN;
                   2706:        }
                   2707:        raidPtr->parity_rewrite_in_progress = 0;
1.85      oster    2708:
                   2709:        /* Anyone waiting for us to stop?  If so, inform them... */
                   2710:        if (raidPtr->waitShutdown) {
1.357     mrg      2711:                rf_lock_mutex2(raidPtr->rad_lock);
                   2712:                cv_broadcast(&raidPtr->parity_rewrite_cv);
                   2713:                rf_unlock_mutex2(raidPtr->rad_lock);
1.85      oster    2714:        }
1.37      oster    2715:
                   2716:        /* That's all... */
1.204     simonb   2717:        kthread_exit(0);        /* does not return */
1.37      oster    2718: }
                   2719:
                   2720:
1.393     mrg      2721: static void
1.169     oster    2722: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37      oster    2723: {
                   2724:        int s;
                   2725:
                   2726:        raidPtr->copyback_in_progress = 1;
                   2727:        s = splbio();
                   2728:        rf_CopybackReconstructedData(raidPtr);
                   2729:        splx(s);
                   2730:        raidPtr->copyback_in_progress = 0;
                   2731:
                   2732:        /* That's all... */
1.204     simonb   2733:        kthread_exit(0);        /* does not return */
1.37      oster    2734: }
                   2735:
                   2736:
1.393     mrg      2737: static void
1.353     mrg      2738: rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
1.37      oster    2739: {
                   2740:        int s;
                   2741:        RF_Raid_t *raidPtr;
1.186     perry    2742:
1.37      oster    2743:        s = splbio();
                   2744:        raidPtr = req->raidPtr;
                   2745:        raidPtr->recon_in_progress = 1;
1.166     oster    2746:        rf_ReconstructInPlace(raidPtr, req->col);
1.37      oster    2747:        RF_Free(req, sizeof(*req));
                   2748:        raidPtr->recon_in_progress = 0;
                   2749:        splx(s);
                   2750:
                   2751:        /* That's all... */
1.204     simonb   2752:        kthread_exit(0);        /* does not return */
1.48      oster    2753: }
                   2754:
1.213     christos 2755: static RF_AutoConfig_t *
                   2756: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276     mrg      2757:     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
                   2758:     unsigned secsize)
1.213     christos 2759: {
                   2760:        int good_one = 0;
1.385     riastrad 2761:        RF_ComponentLabel_t *clabel;
1.213     christos 2762:        RF_AutoConfig_t *ac;
                   2763:
1.379     chs      2764:        clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
1.213     christos 2765:
1.276     mrg      2766:        if (!raidread_component_label(secsize, dev, vp, clabel)) {
                   2767:                /* Got the label.  Does it look reasonable? */
1.385     riastrad 2768:                if (rf_reasonable_label(clabel, numsecs) &&
1.282     enami    2769:                    (rf_component_label_partitionsize(clabel) <= size)) {
1.224     oster    2770: #ifdef DEBUG
1.276     mrg      2771:                        printf("Component on: %s: %llu\n",
1.213     christos 2772:                                cname, (unsigned long long)size);
1.276     mrg      2773:                        rf_print_component_label(clabel);
1.213     christos 2774: #endif
1.276     mrg      2775:                        /* if it's reasonable, add it, else ignore it. */
                   2776:                        ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.379     chs      2777:                                M_WAITOK);
1.276     mrg      2778:                        strlcpy(ac->devname, cname, sizeof(ac->devname));
                   2779:                        ac->dev = dev;
                   2780:                        ac->vp = vp;
                   2781:                        ac->clabel = clabel;
                   2782:                        ac->next = ac_list;
                   2783:                        ac_list = ac;
                   2784:                        good_one = 1;
                   2785:                }
1.213     christos 2786:        }
                   2787:        if (!good_one) {
                   2788:                /* cleanup */
                   2789:                free(clabel, M_RAIDFRAME);
                   2790:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    2791:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213     christos 2792:                vput(vp);
                   2793:        }
                   2794:        return ac_list;
                   2795: }
                   2796:
1.393     mrg      2797: static RF_AutoConfig_t *
1.259     cegger   2798: rf_find_raid_components(void)
1.48      oster    2799: {
                   2800:        struct vnode *vp;
                   2801:        struct disklabel label;
1.261     dyoung   2802:        device_t dv;
1.268     dyoung   2803:        deviter_t di;
1.48      oster    2804:        dev_t dev;
1.296     buhrow   2805:        int bmajor, bminor, wedge, rf_part_found;
1.48      oster    2806:        int error;
                   2807:        int i;
                   2808:        RF_AutoConfig_t *ac_list;
1.276     mrg      2809:        uint64_t numsecs;
                   2810:        unsigned secsize;
1.335     mlelstv  2811:        int dowedges;
1.48      oster    2812:
                   2813:        /* initialize the AutoConfig list */
                   2814:        ac_list = NULL;
                   2815:
1.335     mlelstv  2816:        /*
                   2817:         * we begin by trolling through *all* the devices on the system *twice*
                   2818:         * first we scan for wedges, second for other devices. This avoids
                   2819:         * using a raw partition instead of a wedge that covers the whole disk
                   2820:         */
1.48      oster    2821:
1.335     mlelstv  2822:        for (dowedges=1; dowedges>=0; --dowedges) {
                   2823:                for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
                   2824:                     dv = deviter_next(&di)) {
1.48      oster    2825:
1.393     mrg      2826:                        /* we are only interested in disks */
1.335     mlelstv  2827:                        if (device_class(dv) != DV_DISK)
                   2828:                                continue;
1.48      oster    2829:
1.393     mrg      2830:                        /* we don't care about floppies */
1.335     mlelstv  2831:                        if (device_is_a(dv, "fd")) {
                   2832:                                continue;
                   2833:                        }
1.129     oster    2834:
1.393     mrg      2835:                        /* we don't care about CDs. */
1.335     mlelstv  2836:                        if (device_is_a(dv, "cd")) {
                   2837:                                continue;
                   2838:                        }
1.129     oster    2839:
1.393     mrg      2840:                        /* we don't care about md. */
1.335     mlelstv  2841:                        if (device_is_a(dv, "md")) {
                   2842:                                continue;
                   2843:                        }
1.248     oster    2844:
1.335     mlelstv  2845:                        /* hdfd is the Atari/Hades floppy driver */
                   2846:                        if (device_is_a(dv, "hdfd")) {
                   2847:                                continue;
                   2848:                        }
1.206     thorpej  2849:
1.335     mlelstv  2850:                        /* fdisa is the Atari/Milan floppy driver */
                   2851:                        if (device_is_a(dv, "fdisa")) {
                   2852:                                continue;
                   2853:                        }
1.186     perry    2854:
1.393     mrg      2855:                        /* we don't care about spiflash */
                   2856:                        if (device_is_a(dv, "spiflash")) {
                   2857:                                continue;
                   2858:                        }
                   2859:
1.335     mlelstv  2860:                        /* are we in the wedges pass ? */
                   2861:                        wedge = device_is_a(dv, "dk");
                   2862:                        if (wedge != dowedges) {
                   2863:                                continue;
                   2864:                        }
1.48      oster    2865:
1.335     mlelstv  2866:                        /* need to find the device_name_to_block_device_major stuff */
                   2867:                        bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.296     buhrow   2868:
1.335     mlelstv  2869:                        rf_part_found = 0; /*No raid partition as yet*/
1.48      oster    2870:
1.335     mlelstv  2871:                        /* get a vnode for the raw partition of this disk */
                   2872:                        bminor = minor(device_unit(dv));
                   2873:                        dev = wedge ? makedev(bmajor, bminor) :
                   2874:                            MAKEDISKDEV(bmajor, bminor, RAW_PART);
                   2875:                        if (bdevvp(dev, &vp))
                   2876:                                panic("RAID can't alloc vnode");
1.48      oster    2877:
1.375     hannken  2878:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335     mlelstv  2879:                        error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48      oster    2880:
1.335     mlelstv  2881:                        if (error) {
                   2882:                                /* "Who cares."  Continue looking
                   2883:                                   for something that exists*/
                   2884:                                vput(vp);
                   2885:                                continue;
                   2886:                        }
1.48      oster    2887:
1.335     mlelstv  2888:                        error = getdisksize(vp, &numsecs, &secsize);
1.213     christos 2889:                        if (error) {
1.339     mlelstv  2890:                                /*
                   2891:                                 * Pseudo devices like vnd and cgd can be
                   2892:                                 * opened but may still need some configuration.
                   2893:                                 * Ignore these quietly.
                   2894:                                 */
                   2895:                                if (error != ENXIO)
                   2896:                                        printf("RAIDframe: can't get disk size"
                   2897:                                            " for dev %s (%d)\n",
                   2898:                                            device_xname(dv), error);
1.241     oster    2899:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2900:                                vput(vp);
1.213     christos 2901:                                continue;
                   2902:                        }
1.335     mlelstv  2903:                        if (wedge) {
                   2904:                                struct dkwedge_info dkw;
                   2905:                                error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
                   2906:                                    NOCRED);
                   2907:                                if (error) {
                   2908:                                        printf("RAIDframe: can't get wedge info for "
                   2909:                                            "dev %s (%d)\n", device_xname(dv), error);
                   2910:                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2911:                                        vput(vp);
                   2912:                                        continue;
                   2913:                                }
1.213     christos 2914:
1.335     mlelstv  2915:                                if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
                   2916:                                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2917:                                        vput(vp);
                   2918:                                        continue;
                   2919:                                }
1.385     riastrad 2920:
1.375     hannken  2921:                                VOP_UNLOCK(vp);
1.335     mlelstv  2922:                                ac_list = rf_get_component(ac_list, dev, vp,
                   2923:                                    device_xname(dv), dkw.dkw_size, numsecs, secsize);
                   2924:                                rf_part_found = 1; /*There is a raid component on this disk*/
1.228     christos 2925:                                continue;
1.241     oster    2926:                        }
1.213     christos 2927:
1.335     mlelstv  2928:                        /* Ok, the disk exists.  Go get the disklabel. */
                   2929:                        error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
                   2930:                        if (error) {
                   2931:                                /*
                   2932:                                 * XXX can't happen - open() would
                   2933:                                 * have errored out (or faked up one)
                   2934:                                 */
                   2935:                                if (error != ENOTTY)
                   2936:                                        printf("RAIDframe: can't get label for dev "
                   2937:                                            "%s (%d)\n", device_xname(dv), error);
                   2938:                        }
1.48      oster    2939:
1.335     mlelstv  2940:                        /* don't need this any more.  We'll allocate it again
                   2941:                           a little later if we really do... */
                   2942:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   2943:                        vput(vp);
1.48      oster    2944:
1.335     mlelstv  2945:                        if (error)
1.48      oster    2946:                                continue;
                   2947:
1.335     mlelstv  2948:                        rf_part_found = 0; /*No raid partitions yet*/
                   2949:                        for (i = 0; i < label.d_npartitions; i++) {
                   2950:                                char cname[sizeof(ac_list->devname)];
                   2951:
                   2952:                                /* We only support partitions marked as RAID */
                   2953:                                if (label.d_partitions[i].p_fstype != FS_RAID)
                   2954:                                        continue;
                   2955:
                   2956:                                dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
                   2957:                                if (bdevvp(dev, &vp))
                   2958:                                        panic("RAID can't alloc vnode");
                   2959:
1.375     hannken  2960:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335     mlelstv  2961:                                error = VOP_OPEN(vp, FREAD, NOCRED);
                   2962:                                if (error) {
                   2963:                                        /* Whatever... */
                   2964:                                        vput(vp);
                   2965:                                        continue;
                   2966:                                }
1.375     hannken  2967:                                VOP_UNLOCK(vp);
1.335     mlelstv  2968:                                snprintf(cname, sizeof(cname), "%s%c",
                   2969:                                    device_xname(dv), 'a' + i);
                   2970:                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                   2971:                                        label.d_partitions[i].p_size, numsecs, secsize);
                   2972:                                rf_part_found = 1; /*There is at least one raid partition on this disk*/
1.48      oster    2973:                        }
1.296     buhrow   2974:
1.335     mlelstv  2975:                        /*
                   2976:                         *If there is no raid component on this disk, either in a
                   2977:                         *disklabel or inside a wedge, check the raw partition as well,
                   2978:                         *as it is possible to configure raid components on raw disk
                   2979:                         *devices.
                   2980:                         */
1.296     buhrow   2981:
1.335     mlelstv  2982:                        if (!rf_part_found) {
                   2983:                                char cname[sizeof(ac_list->devname)];
1.296     buhrow   2984:
1.335     mlelstv  2985:                                dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
                   2986:                                if (bdevvp(dev, &vp))
                   2987:                                        panic("RAID can't alloc vnode");
                   2988:
1.375     hannken  2989:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   2990:
1.335     mlelstv  2991:                                error = VOP_OPEN(vp, FREAD, NOCRED);
                   2992:                                if (error) {
                   2993:                                        /* Whatever... */
                   2994:                                        vput(vp);
                   2995:                                        continue;
                   2996:                                }
1.375     hannken  2997:                                VOP_UNLOCK(vp);
1.335     mlelstv  2998:                                snprintf(cname, sizeof(cname), "%s%c",
                   2999:                                    device_xname(dv), 'a' + RAW_PART);
                   3000:                                ac_list = rf_get_component(ac_list, dev, vp, cname,
                   3001:                                        label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.296     buhrow   3002:                        }
1.48      oster    3003:                }
1.335     mlelstv  3004:                deviter_release(&di);
1.48      oster    3005:        }
1.213     christos 3006:        return ac_list;
1.48      oster    3007: }
1.186     perry    3008:
1.292     oster    3009: int
1.284     mrg      3010: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48      oster    3011: {
1.186     perry    3012:
1.393     mrg      3013:        if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
1.394   ! mrg      3014:             clabel->version==RF_COMPONENT_LABEL_VERSION ||
        !          3015:             clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
1.393     mrg      3016:            (clabel->clean == RF_RAID_CLEAN ||
                   3017:             clabel->clean == RF_RAID_DIRTY) &&
1.186     perry    3018:            clabel->row >=0 &&
                   3019:            clabel->column >= 0 &&
1.48      oster    3020:            clabel->num_rows > 0 &&
                   3021:            clabel->num_columns > 0 &&
1.186     perry    3022:            clabel->row < clabel->num_rows &&
1.48      oster    3023:            clabel->column < clabel->num_columns &&
                   3024:            clabel->blockSize > 0 &&
1.282     enami    3025:            /*
                   3026:             * numBlocksHi may contain garbage, but it is ok since
                   3027:             * the type is unsigned.  If it is really garbage,
                   3028:             * rf_fix_old_label_size() will fix it.
                   3029:             */
                   3030:            rf_component_label_numblocks(clabel) > 0) {
1.284     mrg      3031:                /*
                   3032:                 * label looks reasonable enough...
                   3033:                 * let's make sure it has no old garbage.
                   3034:                 */
1.292     oster    3035:                if (numsecs)
                   3036:                        rf_fix_old_label_size(clabel, numsecs);
1.48      oster    3037:                return(1);
                   3038:        }
                   3039:        return(0);
                   3040: }
                   3041:
                   3042:
1.278     mrg      3043: /*
                   3044:  * For reasons yet unknown, some old component labels have garbage in
                   3045:  * the newer numBlocksHi region, and this causes lossage.  Since those
                   3046:  * disks will also have numsecs set to less than 32 bits of sectors,
1.299     oster    3047:  * we can determine when this corruption has occurred, and fix it.
1.284     mrg      3048:  *
                   3049:  * The exact same problem, with the same unknown reason, happens to
                   3050:  * the partitionSizeHi member as well.
1.278     mrg      3051:  */
                   3052: static void
                   3053: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
                   3054: {
                   3055:
1.284     mrg      3056:        if (numsecs < ((uint64_t)1 << 32)) {
                   3057:                if (clabel->numBlocksHi) {
                   3058:                        printf("WARNING: total sectors < 32 bits, yet "
                   3059:                               "numBlocksHi set\n"
                   3060:                               "WARNING: resetting numBlocksHi to zero.\n");
                   3061:                        clabel->numBlocksHi = 0;
                   3062:                }
                   3063:
                   3064:                if (clabel->partitionSizeHi) {
                   3065:                        printf("WARNING: total sectors < 32 bits, yet "
                   3066:                               "partitionSizeHi set\n"
                   3067:                               "WARNING: resetting partitionSizeHi to zero.\n");
                   3068:                        clabel->partitionSizeHi = 0;
                   3069:                }
1.278     mrg      3070:        }
                   3071: }
                   3072:
                   3073:
1.224     oster    3074: #ifdef DEBUG
1.48      oster    3075: void
1.169     oster    3076: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48      oster    3077: {
1.282     enami    3078:        uint64_t numBlocks;
1.308     christos 3079:        static const char *rp[] = {
                   3080:            "No", "Force", "Soft", "*invalid*"
                   3081:        };
                   3082:
1.275     mrg      3083:
1.282     enami    3084:        numBlocks = rf_component_label_numblocks(clabel);
1.275     mrg      3085:
1.48      oster    3086:        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186     perry    3087:               clabel->row, clabel->column,
1.48      oster    3088:               clabel->num_rows, clabel->num_columns);
                   3089:        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
                   3090:               clabel->version, clabel->serial_number,
                   3091:               clabel->mod_counter);
                   3092:        printf("   Clean: %s Status: %d\n",
1.271     dyoung   3093:               clabel->clean ? "Yes" : "No", clabel->status);
1.48      oster    3094:        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
                   3095:               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275     mrg      3096:        printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
                   3097:               (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271     dyoung   3098:        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.308     christos 3099:        printf("   Root partition: %s\n", rp[clabel->root_partition & 3]);
1.271     dyoung   3100:        printf("   Last configured as: raid%d\n", clabel->last_unit);
1.51      oster    3101: #if 0
                   3102:           printf("   Config order: %d\n", clabel->config_order);
                   3103: #endif
1.186     perry    3104:
1.48      oster    3105: }
1.133     oster    3106: #endif
1.48      oster    3107:
1.393     mrg      3108: static RF_ConfigSet_t *
1.169     oster    3109: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48      oster    3110: {
                   3111:        RF_AutoConfig_t *ac;
                   3112:        RF_ConfigSet_t *config_sets;
                   3113:        RF_ConfigSet_t *cset;
                   3114:        RF_AutoConfig_t *ac_next;
                   3115:
                   3116:
                   3117:        config_sets = NULL;
                   3118:
                   3119:        /* Go through the AutoConfig list, and figure out which components
                   3120:           belong to what sets.  */
                   3121:        ac = ac_list;
                   3122:        while(ac!=NULL) {
                   3123:                /* we're going to putz with ac->next, so save it here
                   3124:                   for use at the end of the loop */
                   3125:                ac_next = ac->next;
                   3126:
                   3127:                if (config_sets == NULL) {
                   3128:                        /* will need at least this one... */
1.379     chs      3129:                        config_sets = malloc(sizeof(RF_ConfigSet_t),
                   3130:                                       M_RAIDFRAME, M_WAITOK);
1.48      oster    3131:                        /* this one is easy :) */
                   3132:                        config_sets->ac = ac;
                   3133:                        config_sets->next = NULL;
1.51      oster    3134:                        config_sets->rootable = 0;
1.48      oster    3135:                        ac->next = NULL;
                   3136:                } else {
                   3137:                        /* which set does this component fit into? */
                   3138:                        cset = config_sets;
                   3139:                        while(cset!=NULL) {
1.49      oster    3140:                                if (rf_does_it_fit(cset, ac)) {
1.86      oster    3141:                                        /* looks like it matches... */
                   3142:                                        ac->next = cset->ac;
                   3143:                                        cset->ac = ac;
1.48      oster    3144:                                        break;
                   3145:                                }
                   3146:                                cset = cset->next;
                   3147:                        }
                   3148:                        if (cset==NULL) {
                   3149:                                /* didn't find a match above... new set..*/
1.379     chs      3150:                                cset = malloc(sizeof(RF_ConfigSet_t),
                   3151:                                               M_RAIDFRAME, M_WAITOK);
1.48      oster    3152:                                cset->ac = ac;
                   3153:                                ac->next = NULL;
                   3154:                                cset->next = config_sets;
1.51      oster    3155:                                cset->rootable = 0;
1.48      oster    3156:                                config_sets = cset;
                   3157:                        }
                   3158:                }
                   3159:                ac = ac_next;
                   3160:        }
                   3161:
                   3162:
                   3163:        return(config_sets);
                   3164: }
                   3165:
                   3166: static int
1.169     oster    3167: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48      oster    3168: {
                   3169:        RF_ComponentLabel_t *clabel1, *clabel2;
                   3170:
                   3171:        /* If this one matches the *first* one in the set, that's good
                   3172:           enough, since the other members of the set would have been
                   3173:           through here too... */
1.60      oster    3174:        /* note that we are not checking partitionSize here..
                   3175:
                   3176:           Note that we are also not checking the mod_counters here.
1.299     oster    3177:           If everything else matches except the mod_counter, that's
1.60      oster    3178:           good enough for this test.  We will deal with the mod_counters
1.186     perry    3179:           a little later in the autoconfiguration process.
1.60      oster    3180:
                   3181:            (clabel1->mod_counter == clabel2->mod_counter) &&
1.81      oster    3182:
                   3183:           The reason we don't check for this is that failed disks
                   3184:           will have lower modification counts.  If those disks are
                   3185:           not added to the set they used to belong to, then they will
                   3186:           form their own set, which may result in 2 different sets,
                   3187:           for example, competing to be configured at raid0, and
                   3188:           perhaps competing to be the root filesystem set.  If the
                   3189:           wrong ones get configured, or both attempt to become /,
                   3190:           weird behaviour and or serious lossage will occur.  Thus we
                   3191:           need to bring them into the fold here, and kick them out at
                   3192:           a later point.
1.60      oster    3193:
                   3194:        */
1.48      oster    3195:
                   3196:        clabel1 = cset->ac->clabel;
                   3197:        clabel2 = ac->clabel;
                   3198:        if ((clabel1->version == clabel2->version) &&
                   3199:            (clabel1->serial_number == clabel2->serial_number) &&
                   3200:            (clabel1->num_rows == clabel2->num_rows) &&
                   3201:            (clabel1->num_columns == clabel2->num_columns) &&
                   3202:            (clabel1->sectPerSU == clabel2->sectPerSU) &&
                   3203:            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
                   3204:            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
                   3205:            (clabel1->parityConfig == clabel2->parityConfig) &&
                   3206:            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
                   3207:            (clabel1->blockSize == clabel2->blockSize) &&
1.282     enami    3208:            rf_component_label_numblocks(clabel1) ==
                   3209:            rf_component_label_numblocks(clabel2) &&
1.48      oster    3210:            (clabel1->autoconfigure == clabel2->autoconfigure) &&
                   3211:            (clabel1->root_partition == clabel2->root_partition) &&
                   3212:            (clabel1->last_unit == clabel2->last_unit) &&
                   3213:            (clabel1->config_order == clabel2->config_order)) {
                   3214:                /* if it get's here, it almost *has* to be a match */
                   3215:        } else {
1.186     perry    3216:                /* it's not consistent with somebody in the set..
1.48      oster    3217:                   punt */
                   3218:                return(0);
                   3219:        }
                   3220:        /* all was fine.. it must fit... */
                   3221:        return(1);
                   3222: }
                   3223:
1.393     mrg      3224: static int
1.169     oster    3225: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48      oster    3226: {
1.51      oster    3227:        RF_AutoConfig_t *ac;
                   3228:        RF_AutoConfig_t *auto_config;
                   3229:        RF_ComponentLabel_t *clabel;
1.166     oster    3230:        int c;
1.51      oster    3231:        int num_cols;
                   3232:        int num_missing;
1.86      oster    3233:        int mod_counter;
1.87      oster    3234:        int mod_counter_found;
1.88      oster    3235:        int even_pair_failed;
                   3236:        char parity_type;
1.186     perry    3237:
1.51      oster    3238:
1.48      oster    3239:        /* check to see that we have enough 'live' components
                   3240:           of this set.  If so, we can configure it if necessary */
                   3241:
1.51      oster    3242:        num_cols = cset->ac->clabel->num_columns;
1.88      oster    3243:        parity_type = cset->ac->clabel->parityConfig;
1.51      oster    3244:
                   3245:        /* XXX Check for duplicate components!?!?!? */
                   3246:
1.86      oster    3247:        /* Determine what the mod_counter is supposed to be for this set. */
                   3248:
1.87      oster    3249:        mod_counter_found = 0;
1.101     oster    3250:        mod_counter = 0;
1.86      oster    3251:        ac = cset->ac;
                   3252:        while(ac!=NULL) {
1.87      oster    3253:                if (mod_counter_found==0) {
1.86      oster    3254:                        mod_counter = ac->clabel->mod_counter;
1.87      oster    3255:                        mod_counter_found = 1;
                   3256:                } else {
                   3257:                        if (ac->clabel->mod_counter > mod_counter) {
                   3258:                                mod_counter = ac->clabel->mod_counter;
                   3259:                        }
1.86      oster    3260:                }
                   3261:                ac = ac->next;
                   3262:        }
                   3263:
1.51      oster    3264:        num_missing = 0;
                   3265:        auto_config = cset->ac;
                   3266:
1.166     oster    3267:        even_pair_failed = 0;
                   3268:        for(c=0; c<num_cols; c++) {
                   3269:                ac = auto_config;
                   3270:                while(ac!=NULL) {
1.186     perry    3271:                        if ((ac->clabel->column == c) &&
1.166     oster    3272:                            (ac->clabel->mod_counter == mod_counter)) {
                   3273:                                /* it's this one... */
1.224     oster    3274: #ifdef DEBUG
1.166     oster    3275:                                printf("Found: %s at %d\n",
                   3276:                                       ac->devname,c);
1.51      oster    3277: #endif
1.166     oster    3278:                                break;
1.51      oster    3279:                        }
1.166     oster    3280:                        ac=ac->next;
                   3281:                }
                   3282:                if (ac==NULL) {
1.51      oster    3283:                                /* Didn't find one here! */
1.88      oster    3284:                                /* special case for RAID 1, especially
                   3285:                                   where there are more than 2
                   3286:                                   components (where RAIDframe treats
                   3287:                                   things a little differently :( ) */
1.166     oster    3288:                        if (parity_type == '1') {
                   3289:                                if (c%2 == 0) { /* even component */
                   3290:                                        even_pair_failed = 1;
                   3291:                                } else { /* odd component.  If
                   3292:                                            we're failed, and
                   3293:                                            so is the even
                   3294:                                            component, it's
                   3295:                                            "Good Night, Charlie" */
                   3296:                                        if (even_pair_failed == 1) {
                   3297:                                                return(0);
1.88      oster    3298:                                        }
                   3299:                                }
1.166     oster    3300:                        } else {
                   3301:                                /* normal accounting */
                   3302:                                num_missing++;
1.88      oster    3303:                        }
1.166     oster    3304:                }
                   3305:                if ((parity_type == '1') && (c%2 == 1)) {
1.88      oster    3306:                                /* Just did an even component, and we didn't
1.186     perry    3307:                                   bail.. reset the even_pair_failed flag,
1.88      oster    3308:                                   and go on to the next component.... */
1.166     oster    3309:                        even_pair_failed = 0;
1.51      oster    3310:                }
                   3311:        }
                   3312:
                   3313:        clabel = cset->ac->clabel;
                   3314:
                   3315:        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
                   3316:            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
                   3317:            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                   3318:                /* XXX this needs to be made *much* more general */
                   3319:                /* Too many failures */
                   3320:                return(0);
                   3321:        }
                   3322:        /* otherwise, all is well, and we've got enough to take a kick
                   3323:           at autoconfiguring this set */
                   3324:        return(1);
1.48      oster    3325: }
                   3326:
1.393     mrg      3327: static void
1.169     oster    3328: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222     christos 3329:                        RF_Raid_t *raidPtr)
1.48      oster    3330: {
                   3331:        RF_ComponentLabel_t *clabel;
1.77      oster    3332:        int i;
1.48      oster    3333:
                   3334:        clabel = ac->clabel;
                   3335:
                   3336:        /* 1. Fill in the common stuff */
                   3337:        config->numCol = clabel->num_columns;
                   3338:        config->numSpare = 0; /* XXX should this be set here? */
                   3339:        config->sectPerSU = clabel->sectPerSU;
                   3340:        config->SUsPerPU = clabel->SUsPerPU;
                   3341:        config->SUsPerRU = clabel->SUsPerRU;
                   3342:        config->parityConfig = clabel->parityConfig;
                   3343:        /* XXX... */
                   3344:        strcpy(config->diskQueueType,"fifo");
                   3345:        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
                   3346:        config->layoutSpecificSize = 0; /* XXX ?? */
                   3347:
                   3348:        while(ac!=NULL) {
                   3349:                /* row/col values will be in range due to the checks
                   3350:                   in reasonable_label() */
1.166     oster    3351:                strcpy(config->devnames[0][ac->clabel->column],
1.48      oster    3352:                       ac->devname);
                   3353:                ac = ac->next;
                   3354:        }
                   3355:
1.77      oster    3356:        for(i=0;i<RF_MAXDBGV;i++) {
1.163     fvdl     3357:                config->debugVars[i][0] = 0;
1.77      oster    3358:        }
1.48      oster    3359: }
                   3360:
1.393     mrg      3361: static int
1.169     oster    3362: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3363: {
1.269     jld      3364:        RF_ComponentLabel_t *clabel;
1.166     oster    3365:        int column;
1.148     oster    3366:        int sparecol;
1.48      oster    3367:
1.54      oster    3368:        raidPtr->autoconfigure = new_value;
1.166     oster    3369:
                   3370:        for(column=0; column<raidPtr->numCol; column++) {
                   3371:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3372:                        clabel = raidget_component_label(raidPtr, column);
                   3373:                        clabel->autoconfigure = new_value;
                   3374:                        raidflush_component_label(raidPtr, column);
1.48      oster    3375:                }
                   3376:        }
1.148     oster    3377:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3378:                sparecol = raidPtr->numCol + column;
1.166     oster    3379:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3380:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3381:                        clabel->autoconfigure = new_value;
                   3382:                        raidflush_component_label(raidPtr, sparecol);
1.148     oster    3383:                }
                   3384:        }
1.48      oster    3385:        return(new_value);
                   3386: }
                   3387:
1.393     mrg      3388: static int
1.169     oster    3389: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3390: {
1.269     jld      3391:        RF_ComponentLabel_t *clabel;
1.166     oster    3392:        int column;
1.148     oster    3393:        int sparecol;
1.48      oster    3394:
1.54      oster    3395:        raidPtr->root_partition = new_value;
1.166     oster    3396:        for(column=0; column<raidPtr->numCol; column++) {
                   3397:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3398:                        clabel = raidget_component_label(raidPtr, column);
                   3399:                        clabel->root_partition = new_value;
                   3400:                        raidflush_component_label(raidPtr, column);
1.148     oster    3401:                }
                   3402:        }
                   3403:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3404:                sparecol = raidPtr->numCol + column;
1.166     oster    3405:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3406:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3407:                        clabel->root_partition = new_value;
                   3408:                        raidflush_component_label(raidPtr, sparecol);
1.48      oster    3409:                }
                   3410:        }
                   3411:        return(new_value);
                   3412: }
                   3413:
1.393     mrg      3414: static void
1.169     oster    3415: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48      oster    3416: {
                   3417:        RF_AutoConfig_t *ac;
1.186     perry    3418:
1.48      oster    3419:        ac = cset->ac;
                   3420:        while(ac!=NULL) {
                   3421:                /* Close the vp, and give it back */
                   3422:                if (ac->vp) {
1.96      oster    3423:                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.335     mlelstv  3424:                        VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
1.48      oster    3425:                        vput(ac->vp);
1.86      oster    3426:                        ac->vp = NULL;
1.48      oster    3427:                }
                   3428:                ac = ac->next;
                   3429:        }
                   3430: }
                   3431:
                   3432:
1.393     mrg      3433: static void
1.169     oster    3434: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48      oster    3435: {
                   3436:        RF_AutoConfig_t *ac;
                   3437:        RF_AutoConfig_t *next_ac;
1.186     perry    3438:
1.48      oster    3439:        ac = cset->ac;
                   3440:        while(ac!=NULL) {
                   3441:                next_ac = ac->next;
                   3442:                /* nuke the label */
                   3443:                free(ac->clabel, M_RAIDFRAME);
                   3444:                /* cleanup the config structure */
                   3445:                free(ac, M_RAIDFRAME);
                   3446:                /* "next.." */
                   3447:                ac = next_ac;
                   3448:        }
                   3449:        /* and, finally, nuke the config set */
                   3450:        free(cset, M_RAIDFRAME);
                   3451: }
                   3452:
                   3453:
                   3454: void
1.169     oster    3455: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48      oster    3456: {
1.394   ! mrg      3457:        /* avoid over-writing byteswapped version. */
        !          3458:        if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
        !          3459:                clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57      oster    3460:        clabel->serial_number = raidPtr->serial_number;
1.48      oster    3461:        clabel->mod_counter = raidPtr->mod_counter;
1.269     jld      3462:
1.166     oster    3463:        clabel->num_rows = 1;
1.48      oster    3464:        clabel->num_columns = raidPtr->numCol;
                   3465:        clabel->clean = RF_RAID_DIRTY; /* not clean */
                   3466:        clabel->status = rf_ds_optimal; /* "It's good!" */
1.186     perry    3467:
1.48      oster    3468:        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                   3469:        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
                   3470:        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54      oster    3471:
                   3472:        clabel->blockSize = raidPtr->bytesPerSector;
1.282     enami    3473:        rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54      oster    3474:
1.48      oster    3475:        /* XXX not portable */
                   3476:        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54      oster    3477:        clabel->maxOutstanding = raidPtr->maxOutstanding;
                   3478:        clabel->autoconfigure = raidPtr->autoconfigure;
                   3479:        clabel->root_partition = raidPtr->root_partition;
1.48      oster    3480:        clabel->last_unit = raidPtr->raidid;
1.54      oster    3481:        clabel->config_order = raidPtr->config_order;
1.269     jld      3482:
                   3483: #ifndef RF_NO_PARITY_MAP
                   3484:        rf_paritymap_init_label(raidPtr->parity_map, clabel);
                   3485: #endif
1.51      oster    3486: }
                   3487:
1.393     mrg      3488: static struct raid_softc *
1.300     christos 3489: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51      oster    3490: {
                   3491:        RF_Raid_t *raidPtr;
                   3492:        RF_Config_t *config;
                   3493:        int raidID;
1.300     christos 3494:        struct raid_softc *sc;
1.51      oster    3495:
1.224     oster    3496: #ifdef DEBUG
1.72      oster    3497:        printf("RAID autoconfigure\n");
1.127     oster    3498: #endif
1.51      oster    3499:
                   3500:        /* 1. Create a config structure */
1.379     chs      3501:        config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
1.77      oster    3502:
1.186     perry    3503:        /*
                   3504:           2. Figure out what RAID ID this one is supposed to live at
1.51      oster    3505:           See if we can get the same RAID dev that it was configured
1.186     perry    3506:           on last time..
1.51      oster    3507:        */
                   3508:
                   3509:        raidID = cset->ac->clabel->last_unit;
1.327     pgoyette 3510:        for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
                   3511:             sc = raidget(++raidID, false))
1.300     christos 3512:                continue;
1.224     oster    3513: #ifdef DEBUG
1.72      oster    3514:        printf("Configuring raid%d:\n",raidID);
1.127     oster    3515: #endif
                   3516:
1.327     pgoyette 3517:        if (sc == NULL)
                   3518:                sc = raidget(raidID, true);
1.300     christos 3519:        raidPtr = &sc->sc_r;
1.51      oster    3520:
                   3521:        /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302     christos 3522:        raidPtr->softc = sc;
1.51      oster    3523:        raidPtr->raidid = raidID;
                   3524:        raidPtr->openings = RAIDOUTSTANDING;
                   3525:
                   3526:        /* 3. Build the configuration structure */
                   3527:        rf_create_configuration(cset->ac, config, raidPtr);
                   3528:
                   3529:        /* 4. Do the configuration */
1.300     christos 3530:        if (rf_Configure(raidPtr, config, cset->ac) == 0) {
                   3531:                raidinit(sc);
1.186     perry    3532:
1.300     christos 3533:                rf_markalldirty(raidPtr);
                   3534:                raidPtr->autoconfigure = 1; /* XXX do this here? */
1.308     christos 3535:                switch (cset->ac->clabel->root_partition) {
                   3536:                case 1: /* Force Root */
                   3537:                case 2: /* Soft Root: root when boot partition part of raid */
                   3538:                        /*
                   3539:                         * everything configured just fine.  Make a note
                   3540:                         * that this set is eligible to be root,
                   3541:                         * or forced to be root
                   3542:                         */
                   3543:                        cset->rootable = cset->ac->clabel->root_partition;
1.54      oster    3544:                        /* XXX do this here? */
1.308     christos 3545:                        raidPtr->root_partition = cset->rootable;
                   3546:                        break;
                   3547:                default:
                   3548:                        break;
1.51      oster    3549:                }
1.300     christos 3550:        } else {
                   3551:                raidput(sc);
                   3552:                sc = NULL;
1.51      oster    3553:        }
                   3554:
                   3555:        /* 5. Cleanup */
                   3556:        free(config, M_RAIDFRAME);
1.300     christos 3557:        return sc;
1.99      oster    3558: }
                   3559:
                   3560: void
1.187     christos 3561: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
                   3562:             size_t xmin, size_t xmax)
1.177     oster    3563: {
1.352     christos 3564:
1.227     ad       3565:        pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187     christos 3566:        pool_sethiwat(p, xmax);
1.382     chs      3567:        pool_prime(p, xmin);
1.177     oster    3568: }
1.190     oster    3569:
                   3570: /*
1.335     mlelstv  3571:  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
                   3572:  * to see if there is IO pending and if that IO could possibly be done
                   3573:  * for a given RAID set.  Returns 0 if IO is waiting and can be done, 1
1.190     oster    3574:  * otherwise.
                   3575:  *
                   3576:  */
                   3577: int
1.300     christos 3578: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190     oster    3579: {
1.335     mlelstv  3580:        struct raid_softc *rs;
                   3581:        struct dk_softc *dksc;
                   3582:
                   3583:        rs = raidPtr->softc;
                   3584:        dksc = &rs->sc_dksc;
                   3585:
                   3586:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                   3587:                return 1;
                   3588:
                   3589:        if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
1.190     oster    3590:                /* there is work to do */
                   3591:                return 0;
1.335     mlelstv  3592:        }
1.190     oster    3593:        /* default is nothing to do */
                   3594:        return 1;
                   3595: }
1.213     christos 3596:
                   3597: int
1.294     oster    3598: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213     christos 3599: {
1.275     mrg      3600:        uint64_t numsecs;
                   3601:        unsigned secsize;
1.213     christos 3602:        int error;
                   3603:
1.275     mrg      3604:        error = getdisksize(vp, &numsecs, &secsize);
1.213     christos 3605:        if (error == 0) {
1.275     mrg      3606:                diskPtr->blockSize = secsize;
                   3607:                diskPtr->numBlocks = numsecs - rf_protectedSectors;
                   3608:                diskPtr->partitionSize = numsecs;
1.213     christos 3609:                return 0;
                   3610:        }
                   3611:        return error;
                   3612: }
1.217     oster    3613:
                   3614: static int
1.261     dyoung   3615: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217     oster    3616: {
                   3617:        return 1;
                   3618: }
                   3619:
                   3620: static void
1.261     dyoung   3621: raid_attach(device_t parent, device_t self, void *aux)
1.217     oster    3622: {
                   3623: }
                   3624:
                   3625:
                   3626: static int
1.261     dyoung   3627: raid_detach(device_t self, int flags)
1.217     oster    3628: {
1.266     dyoung   3629:        int error;
1.335     mlelstv  3630:        struct raid_softc *rs = raidsoftc(self);
1.303     christos 3631:
                   3632:        if (rs == NULL)
                   3633:                return ENXIO;
1.266     dyoung   3634:
                   3635:        if ((error = raidlock(rs)) != 0)
1.389     skrll    3636:                return error;
1.217     oster    3637:
1.266     dyoung   3638:        error = raid_detach_unlocked(rs);
                   3639:
1.332     mlelstv  3640:        raidunlock(rs);
                   3641:
                   3642:        /* XXX raid can be referenced here */
                   3643:
                   3644:        if (error)
                   3645:                return error;
                   3646:
                   3647:        /* Free the softc */
                   3648:        raidput(rs);
                   3649:
                   3650:        return 0;
1.217     oster    3651: }
                   3652:
1.234     oster    3653: static void
1.304     christos 3654: rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
1.234     oster    3655: {
1.335     mlelstv  3656:        struct dk_softc *dksc = &rs->sc_dksc;
                   3657:        struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1.304     christos 3658:
                   3659:        memset(dg, 0, sizeof(*dg));
                   3660:
                   3661:        dg->dg_secperunit = raidPtr->totalSectors;
                   3662:        dg->dg_secsize = raidPtr->bytesPerSector;
                   3663:        dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
                   3664:        dg->dg_ntracks = 4 * raidPtr->numCol;
                   3665:
1.335     mlelstv  3666:        disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1.234     oster    3667: }
1.252     oster    3668:
1.348     jdolecek 3669: /*
                   3670:  * Get cache info for all the components (including spares).
                   3671:  * Returns intersection of all the cache flags of all disks, or first
                   3672:  * error if any encountered.
                   3673:  * XXXfua feature flags can change as spares are added - lock down somehow
                   3674:  */
                   3675: static int
                   3676: rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
                   3677: {
                   3678:        int c;
                   3679:        int error;
                   3680:        int dkwhole = 0, dkpart;
1.385     riastrad 3681:
1.348     jdolecek 3682:        for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
                   3683:                /*
                   3684:                 * Check any non-dead disk, even when currently being
                   3685:                 * reconstructed.
                   3686:                 */
                   3687:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
                   3688:                    || raidPtr->Disks[c].status == rf_ds_reconstructing) {
                   3689:                        error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
                   3690:                            DIOCGCACHE, &dkpart, FREAD, NOCRED);
                   3691:                        if (error) {
                   3692:                                if (error != ENODEV) {
                   3693:                                        printf("raid%d: get cache for component %s failed\n",
                   3694:                                            raidPtr->raidid,
                   3695:                                            raidPtr->Disks[c].devname);
                   3696:                                }
                   3697:
                   3698:                                return error;
                   3699:                        }
                   3700:
                   3701:                        if (c == 0)
                   3702:                                dkwhole = dkpart;
                   3703:                        else
                   3704:                                dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
                   3705:                }
                   3706:        }
                   3707:
1.349     jdolecek 3708:        *data = dkwhole;
1.348     jdolecek 3709:
                   3710:        return 0;
                   3711: }
                   3712:
1.385     riastrad 3713: /*
1.252     oster    3714:  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
                   3715:  * We end up returning whatever error was returned by the first cache flush
                   3716:  * that fails.
                   3717:  */
                   3718:
1.386     christos 3719: static int
1.390     christos 3720: rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
1.386     christos 3721: {
                   3722:        int e = 0;
                   3723:        for (int i = 0; i < 5; i++) {
                   3724:                e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
                   3725:                    &force, FWRITE, NOCRED);
                   3726:                if (!e || e == ENODEV)
                   3727:                        return e;
                   3728:                printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
                   3729:                    raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
                   3730:        }
1.387     christos 3731:        return e;
1.386     christos 3732: }
                   3733:
1.269     jld      3734: int
1.390     christos 3735: rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
1.252     oster    3736: {
1.386     christos 3737:        int c, error;
1.385     riastrad 3738:
1.252     oster    3739:        error = 0;
                   3740:        for (c = 0; c < raidPtr->numCol; c++) {
                   3741:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.390     christos 3742:                        int e = rf_sync_component_cache(raidPtr, c, force);
1.387     christos 3743:                        if (e && !error)
1.386     christos 3744:                                error = e;
1.252     oster    3745:                }
                   3746:        }
                   3747:
1.386     christos 3748:        for (c = 0; c < raidPtr->numSpare ; c++) {
                   3749:                int sparecol = raidPtr->numCol + c;
1.252     oster    3750:                /* Need to ensure that the reconstruct actually completed! */
                   3751:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.390     christos 3752:                        int e = rf_sync_component_cache(raidPtr, sparecol,
                   3753:                            force);
1.387     christos 3754:                        if (e && !error)
1.386     christos 3755:                                error = e;
1.252     oster    3756:                }
                   3757:        }
                   3758:        return error;
                   3759: }
1.327     pgoyette 3760:
1.353     mrg      3761: /* Fill in info with the current status */
                   3762: void
                   3763: rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3764: {
                   3765:
                   3766:        if (raidPtr->status != rf_rs_reconstructing) {
                   3767:                info->total = 100;
                   3768:                info->completed = 100;
                   3769:        } else {
                   3770:                info->total = raidPtr->reconControl->numRUsTotal;
                   3771:                info->completed = raidPtr->reconControl->numRUsComplete;
                   3772:        }
                   3773:        info->remaining = info->total - info->completed;
                   3774: }
                   3775:
                   3776: /* Fill in info with the current status */
                   3777: void
                   3778: rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3779: {
                   3780:
                   3781:        if (raidPtr->parity_rewrite_in_progress == 1) {
                   3782:                info->total = raidPtr->Layout.numStripe;
                   3783:                info->completed = raidPtr->parity_rewrite_stripes_done;
                   3784:        } else {
                   3785:                info->completed = 100;
                   3786:                info->total = 100;
                   3787:        }
                   3788:        info->remaining = info->total - info->completed;
                   3789: }
                   3790:
                   3791: /* Fill in info with the current status */
                   3792: void
                   3793: rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
                   3794: {
                   3795:
                   3796:        if (raidPtr->copyback_in_progress == 1) {
                   3797:                info->total = raidPtr->Layout.numStripe;
                   3798:                info->completed = raidPtr->copyback_stripes_done;
                   3799:                info->remaining = info->total - info->completed;
                   3800:        } else {
                   3801:                info->remaining = 0;
                   3802:                info->completed = 100;
                   3803:                info->total = 100;
                   3804:        }
                   3805: }
                   3806:
                   3807: /* Fill in config with the current info */
                   3808: int
                   3809: rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
                   3810: {
                   3811:        int     d, i, j;
                   3812:
                   3813:        if (!raidPtr->valid)
1.389     skrll    3814:                return ENODEV;
1.353     mrg      3815:        config->cols = raidPtr->numCol;
                   3816:        config->ndevs = raidPtr->numCol;
                   3817:        if (config->ndevs >= RF_MAX_DISKS)
1.389     skrll    3818:                return ENOMEM;
1.353     mrg      3819:        config->nspares = raidPtr->numSpare;
                   3820:        if (config->nspares >= RF_MAX_DISKS)
1.389     skrll    3821:                return ENOMEM;
1.353     mrg      3822:        config->maxqdepth = raidPtr->maxQueueDepth;
                   3823:        d = 0;
                   3824:        for (j = 0; j < config->cols; j++) {
                   3825:                config->devs[d] = raidPtr->Disks[j];
                   3826:                d++;
                   3827:        }
                   3828:        for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
                   3829:                config->spares[i] = raidPtr->Disks[j];
                   3830:                if (config->spares[i].status == rf_ds_rebuilding_spare) {
                   3831:                        /* XXX: raidctl(8) expects to see this as a used spare */
                   3832:                        config->spares[i].status = rf_ds_used_spare;
                   3833:                }
                   3834:        }
                   3835:        return 0;
                   3836: }
                   3837:
                   3838: int
                   3839: rf_get_component_label(RF_Raid_t *raidPtr, void *data)
                   3840: {
                   3841:        RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
                   3842:        RF_ComponentLabel_t *raid_clabel;
                   3843:        int column = clabel->column;
                   3844:
                   3845:        if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
                   3846:                return EINVAL;
                   3847:        raid_clabel = raidget_component_label(raidPtr, column);
                   3848:        memcpy(clabel, raid_clabel, sizeof *clabel);
1.394   ! mrg      3849:        /* Fix-up for userland. */
        !          3850:        if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
        !          3851:                clabel->version = RF_COMPONENT_LABEL_VERSION;
1.353     mrg      3852:
                   3853:        return 0;
                   3854: }
                   3855:
1.327     pgoyette 3856: /*
                   3857:  * Module interface
                   3858:  */
                   3859:
1.356     pgoyette 3860: MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
1.327     pgoyette 3861:
                   3862: #ifdef _MODULE
                   3863: CFDRIVER_DECL(raid, DV_DISK, NULL);
                   3864: #endif
                   3865:
                   3866: static int raid_modcmd(modcmd_t, void *);
                   3867: static int raid_modcmd_init(void);
                   3868: static int raid_modcmd_fini(void);
                   3869:
                   3870: static int
                   3871: raid_modcmd(modcmd_t cmd, void *data)
                   3872: {
                   3873:        int error;
                   3874:
                   3875:        error = 0;
                   3876:        switch (cmd) {
                   3877:        case MODULE_CMD_INIT:
                   3878:                error = raid_modcmd_init();
                   3879:                break;
                   3880:        case MODULE_CMD_FINI:
                   3881:                error = raid_modcmd_fini();
                   3882:                break;
                   3883:        default:
                   3884:                error = ENOTTY;
                   3885:                break;
                   3886:        }
                   3887:        return error;
                   3888: }
                   3889:
                   3890: static int
                   3891: raid_modcmd_init(void)
                   3892: {
                   3893:        int error;
                   3894:        int bmajor, cmajor;
                   3895:
                   3896:        mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
                   3897:        mutex_enter(&raid_lock);
                   3898: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
                   3899:        rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
                   3900:        rf_init_cond2(rf_sparet_wait_cv, "sparetw");
                   3901:        rf_init_cond2(rf_sparet_resp_cv, "rfgst");
                   3902:
                   3903:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
                   3904: #endif
                   3905:
                   3906:        bmajor = cmajor = -1;
                   3907:        error = devsw_attach("raid", &raid_bdevsw, &bmajor,
                   3908:            &raid_cdevsw, &cmajor);
                   3909:        if (error != 0 && error != EEXIST) {
                   3910:                aprint_error("%s: devsw_attach failed %d\n", __func__, error);
                   3911:                mutex_exit(&raid_lock);
                   3912:                return error;
                   3913:        }
                   3914: #ifdef _MODULE
                   3915:        error = config_cfdriver_attach(&raid_cd);
                   3916:        if (error != 0) {
                   3917:                aprint_error("%s: config_cfdriver_attach failed %d\n",
                   3918:                    __func__, error);
                   3919:                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3920:                mutex_exit(&raid_lock);
                   3921:                return error;
                   3922:        }
                   3923: #endif
                   3924:        error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3925:        if (error != 0) {
                   3926:                aprint_error("%s: config_cfattach_attach failed %d\n",
                   3927:                    __func__, error);
                   3928: #ifdef _MODULE
                   3929:                config_cfdriver_detach(&raid_cd);
                   3930: #endif
                   3931:                devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3932:                mutex_exit(&raid_lock);
                   3933:                return error;
                   3934:        }
                   3935:
                   3936:        raidautoconfigdone = false;
                   3937:
                   3938:        mutex_exit(&raid_lock);
                   3939:
                   3940:        if (error == 0) {
                   3941:                if (rf_BootRaidframe(true) == 0)
                   3942:                        aprint_verbose("Kernelized RAIDframe activated\n");
                   3943:                else
                   3944:                        panic("Serious error activating RAID!!");
                   3945:        }
                   3946:
                   3947:        /*
                   3948:         * Register a finalizer which will be used to auto-config RAID
                   3949:         * sets once all real hardware devices have been found.
                   3950:         */
                   3951:        error = config_finalize_register(NULL, rf_autoconfig);
                   3952:        if (error != 0) {
                   3953:                aprint_error("WARNING: unable to register RAIDframe "
                   3954:                    "finalizer\n");
1.329     pgoyette 3955:                error = 0;
1.327     pgoyette 3956:        }
                   3957:
                   3958:        return error;
                   3959: }
                   3960:
                   3961: static int
                   3962: raid_modcmd_fini(void)
                   3963: {
                   3964:        int error;
                   3965:
                   3966:        mutex_enter(&raid_lock);
                   3967:
                   3968:        /* Don't allow unload if raid device(s) exist.  */
                   3969:        if (!LIST_EMPTY(&raids)) {
                   3970:                mutex_exit(&raid_lock);
                   3971:                return EBUSY;
                   3972:        }
                   3973:
                   3974:        error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
                   3975:        if (error != 0) {
1.335     mlelstv  3976:                aprint_error("%s: cannot detach cfattach\n",__func__);
1.327     pgoyette 3977:                mutex_exit(&raid_lock);
                   3978:                return error;
                   3979:        }
                   3980: #ifdef _MODULE
                   3981:        error = config_cfdriver_detach(&raid_cd);
                   3982:        if (error != 0) {
1.335     mlelstv  3983:                aprint_error("%s: cannot detach cfdriver\n",__func__);
1.327     pgoyette 3984:                config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3985:                mutex_exit(&raid_lock);
                   3986:                return error;
                   3987:        }
                   3988: #endif
                   3989:        error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
                   3990:        if (error != 0) {
1.335     mlelstv  3991:                aprint_error("%s: cannot detach devsw\n",__func__);
1.327     pgoyette 3992: #ifdef _MODULE
                   3993:                config_cfdriver_attach(&raid_cd);
                   3994: #endif
                   3995:                config_cfattach_attach(raid_cd.cd_name, &raid_ca);
                   3996:                mutex_exit(&raid_lock);
                   3997:                return error;
                   3998:        }
                   3999:        rf_BootRaidframe(false);
                   4000: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
                   4001:        rf_destroy_mutex2(rf_sparet_wait_mutex);
                   4002:        rf_destroy_cond2(rf_sparet_wait_cv);
                   4003:        rf_destroy_cond2(rf_sparet_resp_cv);
                   4004: #endif
                   4005:        mutex_exit(&raid_lock);
                   4006:        mutex_destroy(&raid_lock);
                   4007:
                   4008:        return error;
                   4009: }

CVSweb <webmaster@jp.NetBSD.org>