[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.303

1.303   ! christos    1: /*     $NetBSD: rf_netbsdkintf.c,v 1.302 2013/04/29 21:21:10 christos Exp $    */
1.281     rmind       2:
1.1       oster       3: /*-
1.295     erh         4:  * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1       oster       5:  * All rights reserved.
                      6:  *
                      7:  * This code is derived from software contributed to The NetBSD Foundation
                      8:  * by Greg Oster; Jason R. Thorpe.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
                     18:  *
                     19:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     20:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     21:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     22:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     23:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     24:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     25:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     26:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     27:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     28:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     29:  * POSSIBILITY OF SUCH DAMAGE.
                     30:  */
                     31:
                     32: /*
1.281     rmind      33:  * Copyright (c) 1988 University of Utah.
1.1       oster      34:  * Copyright (c) 1990, 1993
                     35:  *      The Regents of the University of California.  All rights reserved.
                     36:  *
                     37:  * This code is derived from software contributed to Berkeley by
                     38:  * the Systems Programming Group of the University of Utah Computer
                     39:  * Science Department.
                     40:  *
                     41:  * Redistribution and use in source and binary forms, with or without
                     42:  * modification, are permitted provided that the following conditions
                     43:  * are met:
                     44:  * 1. Redistributions of source code must retain the above copyright
                     45:  *    notice, this list of conditions and the following disclaimer.
                     46:  * 2. Redistributions in binary form must reproduce the above copyright
                     47:  *    notice, this list of conditions and the following disclaimer in the
                     48:  *    documentation and/or other materials provided with the distribution.
1.162     agc        49:  * 3. Neither the name of the University nor the names of its contributors
                     50:  *    may be used to endorse or promote products derived from this software
                     51:  *    without specific prior written permission.
                     52:  *
                     53:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     54:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     55:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     56:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     57:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     58:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     59:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     60:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     61:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     62:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     63:  * SUCH DAMAGE.
                     64:  *
                     65:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                     66:  *
                     67:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     68:  */
                     69:
                     70: /*
1.1       oster      71:  * Copyright (c) 1995 Carnegie-Mellon University.
                     72:  * All rights reserved.
                     73:  *
                     74:  * Authors: Mark Holland, Jim Zelenka
                     75:  *
                     76:  * Permission to use, copy, modify and distribute this software and
                     77:  * its documentation is hereby granted, provided that both the copyright
                     78:  * notice and this permission notice appear in all copies of the
                     79:  * software, derivative works or modified versions, and any portions
                     80:  * thereof, and that both notices appear in supporting documentation.
                     81:  *
                     82:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     83:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     84:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     85:  *
                     86:  * Carnegie Mellon requests users of this software to return to
                     87:  *
                     88:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     89:  *  School of Computer Science
                     90:  *  Carnegie Mellon University
                     91:  *  Pittsburgh PA 15213-3890
                     92:  *
                     93:  * any improvements or extensions that they make and grant Carnegie the
                     94:  * rights to redistribute these changes.
                     95:  */
                     96:
                     97: /***********************************************************
                     98:  *
                     99:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    100:  *
                    101:  ***********************************************************/
1.112     lukem     102:
                    103: #include <sys/cdefs.h>
1.303   ! christos  104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.302 2013/04/29 21:21:10 christos Exp $");
1.251     ad        105:
                    106: #ifdef _KERNEL_OPT
1.254     christos  107: #include "opt_compat_netbsd.h"
1.251     ad        108: #include "opt_raid_autoconfig.h"
                    109: #endif
1.1       oster     110:
1.113     lukem     111: #include <sys/param.h>
1.1       oster     112: #include <sys/errno.h>
                    113: #include <sys/pool.h>
1.152     thorpej   114: #include <sys/proc.h>
1.1       oster     115: #include <sys/queue.h>
                    116: #include <sys/disk.h>
                    117: #include <sys/device.h>
                    118: #include <sys/stat.h>
                    119: #include <sys/ioctl.h>
                    120: #include <sys/fcntl.h>
                    121: #include <sys/systm.h>
                    122: #include <sys/vnode.h>
                    123: #include <sys/disklabel.h>
                    124: #include <sys/conf.h>
                    125: #include <sys/buf.h>
1.182     yamt      126: #include <sys/bufq.h>
1.65      oster     127: #include <sys/reboot.h>
1.208     elad      128: #include <sys/kauth.h>
1.8       oster     129:
1.234     oster     130: #include <prop/proplib.h>
                    131:
1.110     oster     132: #include <dev/raidframe/raidframevar.h>
                    133: #include <dev/raidframe/raidframeio.h>
1.269     jld       134: #include <dev/raidframe/rf_paritymap.h>
1.251     ad        135:
1.1       oster     136: #include "rf_raid.h"
1.44      oster     137: #include "rf_copyback.h"
1.1       oster     138: #include "rf_dag.h"
                    139: #include "rf_dagflags.h"
1.99      oster     140: #include "rf_desc.h"
1.1       oster     141: #include "rf_diskqueue.h"
                    142: #include "rf_etimer.h"
                    143: #include "rf_general.h"
                    144: #include "rf_kintf.h"
                    145: #include "rf_options.h"
                    146: #include "rf_driver.h"
                    147: #include "rf_parityscan.h"
                    148: #include "rf_threadstuff.h"
                    149:
1.254     christos  150: #ifdef COMPAT_50
                    151: #include "rf_compat50.h"
                    152: #endif
                    153:
1.133     oster     154: #ifdef DEBUG
1.9       oster     155: int     rf_kdebug_level = 0;
1.1       oster     156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9       oster     157: #else                          /* DEBUG */
1.1       oster     158: #define db1_printf(a) { }
1.9       oster     159: #endif                         /* DEBUG */
1.1       oster     160:
1.249     oster     161: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289     mrg       162: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287     mrg       163: static rf_declare_cond2(rf_sparet_wait_cv);
                    164: static rf_declare_cond2(rf_sparet_resp_cv);
1.1       oster     165:
1.10      oster     166: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    167:                                                 * spare table */
                    168: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    169:                                                 * installation process */
1.249     oster     170: #endif
1.153     thorpej   171:
                    172: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10      oster     173:
1.1       oster     174: /* prototypes */
1.187     christos  175: static void KernelWakeupFunc(struct buf *);
                    176: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225     christos  177:     dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.187     christos  178:     void *, int, struct proc *);
1.300     christos  179: struct raid_softc;
                    180: static void raidinit(struct raid_softc *);
1.1       oster     181:
1.104     oster     182: void raidattach(int);
1.261     dyoung    183: static int raid_match(device_t, cfdata_t, void *);
                    184: static void raid_attach(device_t, device_t, void *);
                    185: static int raid_detach(device_t, int);
1.130     gehenna   186:
1.269     jld       187: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
                    188:     daddr_t, daddr_t);
                    189: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
                    190:     daddr_t, daddr_t, int);
                    191:
1.276     mrg       192: static int raidwrite_component_label(unsigned,
                    193:     dev_t, struct vnode *, RF_ComponentLabel_t *);
                    194: static int raidread_component_label(unsigned,
                    195:     dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269     jld       196:
                    197:
1.130     gehenna   198: dev_type_open(raidopen);
                    199: dev_type_close(raidclose);
                    200: dev_type_read(raidread);
                    201: dev_type_write(raidwrite);
                    202: dev_type_ioctl(raidioctl);
                    203: dev_type_strategy(raidstrategy);
                    204: dev_type_dump(raiddump);
                    205: dev_type_size(raidsize);
                    206:
                    207: const struct bdevsw raid_bdevsw = {
                    208:        raidopen, raidclose, raidstrategy, raidioctl,
                    209:        raiddump, raidsize, D_DISK
                    210: };
                    211:
                    212: const struct cdevsw raid_cdevsw = {
                    213:        raidopen, raidclose, raidread, raidwrite, raidioctl,
1.144     jdolecek  214:        nostop, notty, nopoll, nommap, nokqfilter, D_DISK
1.130     gehenna   215: };
1.1       oster     216:
1.235     oster     217: static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
                    218:
1.10      oster     219: struct raid_softc {
1.261     dyoung    220:        device_t sc_dev;
1.300     christos  221:        int     sc_unit;
1.10      oster     222:        int     sc_flags;       /* flags */
                    223:        int     sc_cflags;      /* configuration flags */
1.212     oster     224:        uint64_t sc_size;       /* size of the raid device */
1.10      oster     225:        char    sc_xname[20];   /* XXX external name */
                    226:        struct disk sc_dkdev;   /* generic disk device info */
1.191     yamt      227:        struct bufq_state *buf_queue;   /* used for the device queue */
1.300     christos  228:        RF_Raid_t sc_r;
                    229:        LIST_ENTRY(raid_softc) sc_link;
1.10      oster     230: };
1.1       oster     231: /* sc_flags */
                    232: #define RAIDF_INITED   0x01    /* unit has been initialized */
                    233: #define RAIDF_WLABEL   0x02    /* label area is writable */
                    234: #define RAIDF_LABELLING        0x04    /* unit is currently being labelled */
1.266     dyoung    235: #define RAIDF_SHUTDOWN 0x08    /* unit is being shutdown */
1.1       oster     236: #define RAIDF_WANTED   0x40    /* someone is waiting to obtain a lock */
                    237: #define RAIDF_LOCKED   0x80    /* unit is locked */
                    238:
                    239: #define        raidunit(x)     DISKUNIT(x)
                    240:
1.202     oster     241: extern struct cfdriver raid_cd;
1.266     dyoung    242: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
                    243:     raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
                    244:     DVF_DETACH_SHUTDOWN);
1.202     oster     245:
1.186     perry     246: /*
                    247:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    248:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     249:  * kernel memory, especially on writes, and in degraded mode reads.
1.186     perry     250:  *
                    251:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    252:  * a single 64K write will typically require 64K for the old data,
                    253:  * 64K for the old parity, and 64K for the new parity, for a total
1.28      oster     254:  * of 192K (if the parity buffer is not re-used immediately).
1.110     oster     255:  * Even it if is used immediately, that's still 128K, which when multiplied
1.28      oster     256:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186     perry     257:  *
1.28      oster     258:  * Now in degraded mode, for example, a 64K read on the above setup may
1.186     perry     259:  * require data reconstruction, which will require *all* of the 4 remaining
1.28      oster     260:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     261:  */
                    262:
                    263: #ifndef RAIDOUTSTANDING
1.28      oster     264: #define RAIDOUTSTANDING   6
1.20      oster     265: #endif
                    266:
1.1       oster     267: #define RAIDLABELDEV(dev)      \
                    268:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    269:
                    270: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9       oster     271:
1.186     perry     272: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
1.104     oster     273:                                     struct disklabel *);
                    274: static void raidgetdisklabel(dev_t);
                    275: static void raidmakedisklabel(struct raid_softc *);
1.1       oster     276:
1.104     oster     277: static int raidlock(struct raid_softc *);
                    278: static void raidunlock(struct raid_softc *);
1.1       oster     279:
1.266     dyoung    280: static int raid_detach_unlocked(struct raid_softc *);
                    281:
1.104     oster     282: static void rf_markalldirty(RF_Raid_t *);
1.234     oster     283: static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
1.48      oster     284:
1.104     oster     285: void rf_ReconThread(struct rf_recon_req *);
                    286: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
                    287: void rf_CopybackThread(RF_Raid_t *raidPtr);
                    288: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
1.261     dyoung    289: int rf_autoconfig(device_t);
1.142     thorpej   290: void rf_buildroothack(RF_ConfigSet_t *);
1.104     oster     291:
                    292: RF_AutoConfig_t *rf_find_raid_components(void);
                    293: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
                    294: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.292     oster     295: int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
1.104     oster     296: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
                    297: int rf_set_autoconfig(RF_Raid_t *, int);
                    298: int rf_set_rootpartition(RF_Raid_t *, int);
                    299: void rf_release_all_vps(RF_ConfigSet_t *);
                    300: void rf_cleanup_config_set(RF_ConfigSet_t *);
                    301: int rf_have_enough_components(RF_ConfigSet_t *);
1.300     christos  302: struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278     mrg       303: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48      oster     304:
1.295     erh       305: /*
                    306:  * Debugging, mostly.  Set to 0 to not allow autoconfig to take place.
                    307:  * Note that this is overridden by having RAID_AUTOCONFIG as an option
                    308:  * in the kernel config file.
                    309:  */
                    310: #ifdef RAID_AUTOCONFIG
                    311: int raidautoconfig = 1;
                    312: #else
                    313: int raidautoconfig = 0;
                    314: #endif
                    315: static bool raidautoconfigdone = false;
1.37      oster     316:
1.177     oster     317: struct RF_Pools_s rf_pools;
                    318:
1.300     christos  319: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
                    320: static kmutex_t raid_lock;
1.1       oster     321:
1.300     christos  322: static struct raid_softc *
                    323: raidcreate(int unit) {
                    324:        struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
                    325:        if (sc == NULL) {
1.1       oster     326: #ifdef DIAGNOSTIC
1.300     christos  327:                printf("%s: out of memory\n", __func__);
1.1       oster     328: #endif
1.300     christos  329:                return NULL;
1.1       oster     330:        }
1.300     christos  331:        sc->sc_unit = unit;
                    332:        bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
                    333:        return sc;
                    334: }
1.1       oster     335:
1.300     christos  336: static void
                    337: raiddestroy(struct raid_softc *sc) {
                    338:        bufq_free(sc->buf_queue);
                    339:        kmem_free(sc, sizeof(*sc));
                    340: }
1.50      oster     341:
1.300     christos  342: static struct raid_softc *
                    343: raidget(int unit) {
                    344:        struct raid_softc *sc;
                    345:        if (unit < 0) {
                    346: #ifdef DIAGNOSTIC
                    347:                panic("%s: unit %d!", __func__, unit);
                    348: #endif
                    349:                return NULL;
                    350:        }
                    351:        mutex_enter(&raid_lock);
                    352:        LIST_FOREACH(sc, &raids, sc_link) {
                    353:                if (sc->sc_unit == unit) {
                    354:                        mutex_exit(&raid_lock);
                    355:                        return sc;
                    356:                }
                    357:        }
                    358:        mutex_exit(&raid_lock);
                    359:        if ((sc = raidcreate(unit)) == NULL)
                    360:                return NULL;
                    361:        mutex_enter(&raid_lock);
                    362:        LIST_INSERT_HEAD(&raids, sc, sc_link);
                    363:        mutex_exit(&raid_lock);
                    364:        return sc;
                    365: }
                    366:
                    367: static void
                    368: raidput(struct raid_softc *sc) {
                    369:        mutex_enter(&raid_lock);
                    370:        LIST_REMOVE(sc, sc_link);
                    371:        mutex_exit(&raid_lock);
                    372:        raiddestroy(sc);
                    373: }
1.1       oster     374:
1.300     christos  375: void
                    376: raidattach(int num)
                    377: {
                    378:        mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
                    379:        /* This is where all the initialization stuff gets done. */
1.116     thorpej   380:
1.249     oster     381: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289     mrg       382:        rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
                    383:        rf_init_cond2(rf_sparet_wait_cv, "sparetw");
                    384:        rf_init_cond2(rf_sparet_resp_cv, "rfgst");
1.14      oster     385:
                    386:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1.249     oster     387: #endif
1.14      oster     388:
1.300     christos  389:        if (rf_BootRaidframe() == 0)
1.274     chs       390:                aprint_verbose("Kernelized RAIDframe activated\n");
1.14      oster     391:        else
1.141     provos    392:                panic("Serious error booting RAID!!");
1.14      oster     393:
1.217     oster     394:        if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
1.239     jmcneill  395:                aprint_error("raidattach: config_cfattach_attach failed?\n");
1.217     oster     396:        }
                    397:
1.295     erh       398:        raidautoconfigdone = false;
1.62      oster     399:
1.142     thorpej   400:        /*
                    401:         * Register a finalizer which will be used to auto-config RAID
                    402:         * sets once all real hardware devices have been found.
                    403:         */
                    404:        if (config_finalize_register(NULL, rf_autoconfig) != 0)
1.239     jmcneill  405:                aprint_error("WARNING: unable to register RAIDframe finalizer\n");
1.142     thorpej   406: }
                    407:
                    408: int
1.261     dyoung    409: rf_autoconfig(device_t self)
1.142     thorpej   410: {
                    411:        RF_AutoConfig_t *ac_list;
                    412:        RF_ConfigSet_t *config_sets;
                    413:
1.295     erh       414:        if (!raidautoconfig || raidautoconfigdone == true)
1.142     thorpej   415:                return (0);
                    416:
                    417:        /* XXX This code can only be run once. */
1.295     erh       418:        raidautoconfigdone = true;
1.142     thorpej   419:
1.48      oster     420:        /* 1. locate all RAID components on the system */
1.258     ad        421:        aprint_debug("Searching for RAID components...\n");
1.48      oster     422:        ac_list = rf_find_raid_components();
                    423:
1.142     thorpej   424:        /* 2. Sort them into their respective sets. */
1.48      oster     425:        config_sets = rf_create_auto_sets(ac_list);
                    426:
1.142     thorpej   427:        /*
1.299     oster     428:         * 3. Evaluate each set and configure the valid ones.
1.142     thorpej   429:         * This gets done in rf_buildroothack().
                    430:         */
                    431:        rf_buildroothack(config_sets);
1.48      oster     432:
1.213     christos  433:        return 1;
1.48      oster     434: }
                    435:
                    436: void
1.142     thorpej   437: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48      oster     438: {
                    439:        RF_ConfigSet_t *cset;
                    440:        RF_ConfigSet_t *next_cset;
1.226     oster     441:        int col;
1.51      oster     442:        int num_root;
1.226     oster     443:        char *devname;
1.300     christos  444:        struct raid_softc *sc, *rsc;
1.48      oster     445:
1.300     christos  446:        sc = rsc = NULL;
1.51      oster     447:        num_root = 0;
1.48      oster     448:        cset = config_sets;
1.271     dyoung    449:        while (cset != NULL) {
1.48      oster     450:                next_cset = cset->next;
1.186     perry     451:                if (rf_have_enough_components(cset) &&
1.300     christos  452:                    cset->ac->clabel->autoconfigure == 1) {
                    453:                        sc = rf_auto_config_set(cset);
                    454:                        if (sc != NULL) {
                    455:                                aprint_debug("raid%d: configured ok\n",
                    456:                                    sc->sc_unit);
1.51      oster     457:                                if (cset->rootable) {
1.300     christos  458:                                        rsc = sc;
1.51      oster     459:                                        num_root++;
                    460:                                }
                    461:                        } else {
                    462:                                /* The autoconfig didn't work :( */
1.300     christos  463:                                aprint_debug("Autoconfig failed\n");
1.51      oster     464:                                rf_release_all_vps(cset);
1.48      oster     465:                        }
                    466:                } else {
1.186     perry     467:                        /* we're not autoconfiguring this set...
1.48      oster     468:                           release the associated resources */
1.49      oster     469:                        rf_release_all_vps(cset);
1.48      oster     470:                }
                    471:                /* cleanup */
1.49      oster     472:                rf_cleanup_config_set(cset);
1.48      oster     473:                cset = next_cset;
                    474:        }
1.122     oster     475:
1.223     oster     476:        /* if the user has specified what the root device should be
                    477:           then we don't touch booted_device or boothowto... */
                    478:
                    479:        if (rootspec != NULL)
                    480:                return;
                    481:
1.122     oster     482:        /* we found something bootable... */
                    483:
                    484:        if (num_root == 1) {
1.300     christos  485:                if (rsc->sc_dkdev.dk_nwedges != 0) {
1.297     christos  486:                        /* XXX: How do we find the real root partition? */
                    487:                        char cname[sizeof(cset->ac->devname)];
                    488:                        snprintf(cname, sizeof(cname), "%s%c",
1.300     christos  489:                            device_xname(rsc->sc_dev), 'a');
1.297     christos  490:                        booted_device = dkwedge_find_by_wname(cname);
                    491:                } else
1.300     christos  492:                        booted_device = rsc->sc_dev;
1.122     oster     493:        } else if (num_root > 1) {
1.226     oster     494:
                    495:                /*
                    496:                 * Maybe the MD code can help. If it cannot, then
                    497:                 * setroot() will discover that we have no
                    498:                 * booted_device and will ask the user if nothing was
                    499:                 * hardwired in the kernel config file
                    500:                 */
                    501:
                    502:                if (booted_device == NULL)
                    503:                        cpu_rootconf();
                    504:                if (booted_device == NULL)
                    505:                        return;
                    506:
                    507:                num_root = 0;
1.300     christos  508:                mutex_enter(&raid_lock);
                    509:                LIST_FOREACH(sc, &raids, sc_link) {
                    510:                        RF_Raid_t *r = &sc->sc_r;
                    511:                        if (r->valid == 0)
1.226     oster     512:                                continue;
                    513:
1.300     christos  514:                        if (r->root_partition == 0)
1.226     oster     515:                                continue;
                    516:
1.300     christos  517:                        for (col = 0; col < r->numCol; col++) {
                    518:                                devname = r->Disks[col].devname;
1.226     oster     519:                                devname += sizeof("/dev/") - 1;
1.245     cegger    520:                                if (strncmp(devname, device_xname(booted_device),
                    521:                                            strlen(device_xname(booted_device))) != 0)
1.226     oster     522:                                        continue;
1.258     ad        523:                                aprint_debug("raid%d includes boot device %s\n",
1.300     christos  524:                                       sc->sc_unit, devname);
1.226     oster     525:                                num_root++;
1.300     christos  526:                                rsc = sc;
1.226     oster     527:                        }
                    528:                }
1.300     christos  529:                mutex_exit(&raid_lock);
1.295     erh       530:
1.226     oster     531:                if (num_root == 1) {
1.300     christos  532:                        booted_device = rsc->sc_dev;
1.226     oster     533:                } else {
                    534:                        /* we can't guess.. require the user to answer... */
                    535:                        boothowto |= RB_ASKNAME;
                    536:                }
1.51      oster     537:        }
1.1       oster     538: }
                    539:
                    540:
                    541: int
1.169     oster     542: raidsize(dev_t dev)
1.1       oster     543: {
                    544:        struct raid_softc *rs;
                    545:        struct disklabel *lp;
1.9       oster     546:        int     part, unit, omask, size;
1.1       oster     547:
                    548:        unit = raidunit(dev);
1.300     christos  549:        if ((rs = raidget(unit)) == NULL)
                    550:                return -1;
1.1       oster     551:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    552:                return (-1);
                    553:
                    554:        part = DISKPART(dev);
                    555:        omask = rs->sc_dkdev.dk_openmask & (1 << part);
                    556:        lp = rs->sc_dkdev.dk_label;
                    557:
1.192     christos  558:        if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
1.1       oster     559:                return (-1);
                    560:
                    561:        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                    562:                size = -1;
                    563:        else
                    564:                size = lp->d_partitions[part].p_size *
                    565:                    (lp->d_secsize / DEV_BSIZE);
                    566:
1.192     christos  567:        if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
1.1       oster     568:                return (-1);
                    569:
                    570:        return (size);
                    571:
                    572: }
                    573:
                    574: int
1.231     oster     575: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1.1       oster     576: {
1.231     oster     577:        int     unit = raidunit(dev);
                    578:        struct raid_softc *rs;
                    579:        const struct bdevsw *bdev;
                    580:        struct disklabel *lp;
                    581:        RF_Raid_t *raidPtr;
                    582:        daddr_t offset;
                    583:        int     part, c, sparecol, j, scol, dumpto;
                    584:        int     error = 0;
                    585:
1.300     christos  586:        if ((rs = raidget(unit)) == NULL)
                    587:                return ENXIO;
1.231     oster     588:
1.300     christos  589:        raidPtr = &rs->sc_r;
1.231     oster     590:
                    591:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    592:                return ENXIO;
                    593:
                    594:        /* we only support dumping to RAID 1 sets */
                    595:        if (raidPtr->Layout.numDataCol != 1 ||
                    596:            raidPtr->Layout.numParityCol != 1)
                    597:                return EINVAL;
                    598:
                    599:
                    600:        if ((error = raidlock(rs)) != 0)
                    601:                return error;
                    602:
                    603:        if (size % DEV_BSIZE != 0) {
                    604:                error = EINVAL;
                    605:                goto out;
                    606:        }
                    607:
                    608:        if (blkno + size / DEV_BSIZE > rs->sc_size) {
                    609:                printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
                    610:                    "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
                    611:                    size / DEV_BSIZE, rs->sc_size);
                    612:                error = EINVAL;
                    613:                goto out;
                    614:        }
                    615:
                    616:        part = DISKPART(dev);
                    617:        lp = rs->sc_dkdev.dk_label;
                    618:        offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
                    619:
                    620:        /* figure out what device is alive.. */
                    621:
                    622:        /*
                    623:           Look for a component to dump to.  The preference for the
                    624:           component to dump to is as follows:
                    625:           1) the master
                    626:           2) a used_spare of the master
                    627:           3) the slave
                    628:           4) a used_spare of the slave
                    629:        */
                    630:
                    631:        dumpto = -1;
                    632:        for (c = 0; c < raidPtr->numCol; c++) {
                    633:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                    634:                        /* this might be the one */
                    635:                        dumpto = c;
                    636:                        break;
                    637:                }
                    638:        }
                    639:
                    640:        /*
                    641:           At this point we have possibly selected a live master or a
                    642:           live slave.  We now check to see if there is a spared
                    643:           master (or a spared slave), if we didn't find a live master
                    644:           or a live slave.
                    645:        */
                    646:
                    647:        for (c = 0; c < raidPtr->numSpare; c++) {
                    648:                sparecol = raidPtr->numCol + c;
                    649:                if (raidPtr->Disks[sparecol].status ==  rf_ds_used_spare) {
                    650:                        /* How about this one? */
                    651:                        scol = -1;
                    652:                        for(j=0;j<raidPtr->numCol;j++) {
                    653:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                    654:                                        scol = j;
                    655:                                        break;
                    656:                                }
                    657:                        }
                    658:                        if (scol == 0) {
                    659:                                /*
                    660:                                   We must have found a spared master!
                    661:                                   We'll take that over anything else
                    662:                                   found so far.  (We couldn't have
                    663:                                   found a real master before, since
                    664:                                   this is a used spare, and it's
                    665:                                   saying that it's replacing the
                    666:                                   master.)  On reboot (with
                    667:                                   autoconfiguration turned on)
                    668:                                   sparecol will become the 1st
                    669:                                   component (component0) of this set.
                    670:                                */
                    671:                                dumpto = sparecol;
                    672:                                break;
                    673:                        } else if (scol != -1) {
                    674:                                /*
                    675:                                   Must be a spared slave.  We'll dump
                    676:                                   to that if we havn't found anything
                    677:                                   else so far.
                    678:                                */
                    679:                                if (dumpto == -1)
                    680:                                        dumpto = sparecol;
                    681:                        }
                    682:                }
                    683:        }
                    684:
                    685:        if (dumpto == -1) {
                    686:                /* we couldn't find any live components to dump to!?!?
                    687:                 */
                    688:                error = EINVAL;
                    689:                goto out;
                    690:        }
                    691:
                    692:        bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
                    693:
                    694:        /*
                    695:           Note that blkno is relative to this particular partition.
                    696:           By adding the offset of this partition in the RAID
                    697:           set, and also adding RF_PROTECTED_SECTORS, we get a
                    698:           value that is relative to the partition used for the
                    699:           underlying component.
                    700:        */
                    701:
                    702:        error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
                    703:                                blkno + offset, va, size);
                    704:
                    705: out:
                    706:        raidunlock(rs);
                    707:
                    708:        return error;
1.1       oster     709: }
                    710: /* ARGSUSED */
                    711: int
1.222     christos  712: raidopen(dev_t dev, int flags, int fmt,
                    713:     struct lwp *l)
1.1       oster     714: {
1.9       oster     715:        int     unit = raidunit(dev);
1.1       oster     716:        struct raid_softc *rs;
                    717:        struct disklabel *lp;
1.9       oster     718:        int     part, pmask;
                    719:        int     error = 0;
                    720:
1.300     christos  721:        if ((rs = raidget(unit)) == NULL)
                    722:                return ENXIO;
1.1       oster     723:        if ((error = raidlock(rs)) != 0)
1.9       oster     724:                return (error);
1.266     dyoung    725:
                    726:        if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
                    727:                error = EBUSY;
                    728:                goto bad;
                    729:        }
                    730:
1.1       oster     731:        lp = rs->sc_dkdev.dk_label;
                    732:
                    733:        part = DISKPART(dev);
1.213     christos  734:
                    735:        /*
                    736:         * If there are wedges, and this is not RAW_PART, then we
                    737:         * need to fail.
                    738:         */
                    739:        if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
                    740:                error = EBUSY;
                    741:                goto bad;
                    742:        }
1.1       oster     743:        pmask = (1 << part);
                    744:
                    745:        if ((rs->sc_flags & RAIDF_INITED) &&
                    746:            (rs->sc_dkdev.dk_openmask == 0))
1.9       oster     747:                raidgetdisklabel(dev);
1.1       oster     748:
                    749:        /* make sure that this partition exists */
                    750:
                    751:        if (part != RAW_PART) {
                    752:                if (((rs->sc_flags & RAIDF_INITED) == 0) ||
                    753:                    ((part >= lp->d_npartitions) ||
1.9       oster     754:                        (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1       oster     755:                        error = ENXIO;
1.213     christos  756:                        goto bad;
1.1       oster     757:                }
                    758:        }
                    759:        /* Prevent this unit from being unconfigured while open. */
                    760:        switch (fmt) {
                    761:        case S_IFCHR:
                    762:                rs->sc_dkdev.dk_copenmask |= pmask;
                    763:                break;
                    764:
                    765:        case S_IFBLK:
                    766:                rs->sc_dkdev.dk_bopenmask |= pmask;
                    767:                break;
                    768:        }
1.13      oster     769:
1.186     perry     770:        if ((rs->sc_dkdev.dk_openmask == 0) &&
1.13      oster     771:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    772:                /* First one... mark things as dirty... Note that we *MUST*
                    773:                 have done a configure before this.  I DO NOT WANT TO BE
                    774:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    775:                 THAT THEY BELONG TOGETHER!!!!! */
                    776:                /* XXX should check to see if we're only open for reading
                    777:                   here... If so, we needn't do this, but then need some
                    778:                   other way of keeping track of what's happened.. */
                    779:
1.300     christos  780:                rf_markalldirty(&rs->sc_r);
1.13      oster     781:        }
                    782:
                    783:
1.1       oster     784:        rs->sc_dkdev.dk_openmask =
                    785:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
                    786:
1.213     christos  787: bad:
1.1       oster     788:        raidunlock(rs);
                    789:
1.9       oster     790:        return (error);
1.1       oster     791:
                    792:
                    793: }
                    794: /* ARGSUSED */
                    795: int
1.222     christos  796: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1       oster     797: {
1.9       oster     798:        int     unit = raidunit(dev);
1.1       oster     799:        struct raid_softc *rs;
1.9       oster     800:        int     error = 0;
                    801:        int     part;
1.1       oster     802:
1.300     christos  803:        if ((rs = raidget(unit)) == NULL)
                    804:                return ENXIO;
1.1       oster     805:
                    806:        if ((error = raidlock(rs)) != 0)
                    807:                return (error);
                    808:
                    809:        part = DISKPART(dev);
                    810:
                    811:        /* ...that much closer to allowing unconfiguration... */
                    812:        switch (fmt) {
                    813:        case S_IFCHR:
                    814:                rs->sc_dkdev.dk_copenmask &= ~(1 << part);
                    815:                break;
                    816:
                    817:        case S_IFBLK:
                    818:                rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
                    819:                break;
                    820:        }
                    821:        rs->sc_dkdev.dk_openmask =
                    822:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.186     perry     823:
1.13      oster     824:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    825:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
1.186     perry     826:                /* Last one... device is not unconfigured yet.
                    827:                   Device shutdown has taken care of setting the
                    828:                   clean bits if RAIDF_INITED is not set
1.13      oster     829:                   mark things as clean... */
1.147     oster     830:
1.300     christos  831:                rf_update_component_labels(&rs->sc_r,
1.91      oster     832:                                                 RF_FINAL_COMPONENT_UPDATE);
1.186     perry     833:
1.266     dyoung    834:                /* If the kernel is shutting down, it will detach
                    835:                 * this RAID set soon enough.
                    836:                 */
1.13      oster     837:        }
1.1       oster     838:
                    839:        raidunlock(rs);
                    840:        return (0);
                    841:
                    842: }
                    843:
                    844: void
1.169     oster     845: raidstrategy(struct buf *bp)
1.1       oster     846: {
1.300     christos  847:        unsigned int unit = raidunit(bp->b_dev);
1.1       oster     848:        RF_Raid_t *raidPtr;
1.9       oster     849:        int     wlabel;
1.300     christos  850:        struct raid_softc *rs;
1.1       oster     851:
1.300     christos  852:        if ((rs = raidget(unit)) == NULL) {
1.30      oster     853:                bp->b_error = ENXIO;
1.196     yamt      854:                goto done;
1.30      oster     855:        }
1.300     christos  856:        if ((rs->sc_flags & RAIDF_INITED) == 0) {
                    857:                bp->b_error = ENXIO;
1.196     yamt      858:                goto done;
1.1       oster     859:        }
1.300     christos  860:        raidPtr = &rs->sc_r;
1.1       oster     861:        if (!raidPtr->valid) {
                    862:                bp->b_error = ENODEV;
1.196     yamt      863:                goto done;
1.1       oster     864:        }
                    865:        if (bp->b_bcount == 0) {
                    866:                db1_printf(("b_bcount is zero..\n"));
1.196     yamt      867:                goto done;
1.1       oster     868:        }
                    869:
                    870:        /*
                    871:         * Do bounds checking and adjust transfer.  If there's an
                    872:         * error, the bounds check will flag that for us.
                    873:         */
                    874:
1.9       oster     875:        wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.196     yamt      876:        if (DISKPART(bp->b_dev) == RAW_PART) {
                    877:                uint64_t size; /* device size in DEV_BSIZE unit */
                    878:
                    879:                if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
                    880:                        size = raidPtr->totalSectors <<
                    881:                            (raidPtr->logBytesPerSector - DEV_BSHIFT);
                    882:                } else {
                    883:                        size = raidPtr->totalSectors >>
                    884:                            (DEV_BSHIFT - raidPtr->logBytesPerSector);
                    885:                }
                    886:                if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
                    887:                        goto done;
                    888:                }
                    889:        } else {
1.159     thorpej   890:                if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
1.1       oster     891:                        db1_printf(("Bounds check failed!!:%d %d\n",
1.9       oster     892:                                (int) bp->b_blkno, (int) wlabel));
1.196     yamt      893:                        goto done;
1.1       oster     894:                }
1.196     yamt      895:        }
1.285     mrg       896:
1.286     mrg       897:        rf_lock_mutex2(raidPtr->iodone_lock);
1.1       oster     898:
                    899:        bp->b_resid = 0;
1.34      oster     900:
                    901:        /* stuff it onto our queue */
1.253     yamt      902:        bufq_put(rs->buf_queue, bp);
1.34      oster     903:
1.190     oster     904:        /* scheduled the IO to happen at the next convenient time */
1.286     mrg       905:        rf_signal_cond2(raidPtr->iodone_cv);
                    906:        rf_unlock_mutex2(raidPtr->iodone_lock);
1.34      oster     907:
1.196     yamt      908:        return;
                    909:
                    910: done:
                    911:        bp->b_resid = bp->b_bcount;
                    912:        biodone(bp);
1.1       oster     913: }
                    914: /* ARGSUSED */
                    915: int
1.222     christos  916: raidread(dev_t dev, struct uio *uio, int flags)
1.1       oster     917: {
1.9       oster     918:        int     unit = raidunit(dev);
1.1       oster     919:        struct raid_softc *rs;
                    920:
1.300     christos  921:        if ((rs = raidget(unit)) == NULL)
                    922:                return ENXIO;
1.1       oster     923:
                    924:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    925:                return (ENXIO);
                    926:
                    927:        return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
                    928:
                    929: }
                    930: /* ARGSUSED */
                    931: int
1.222     christos  932: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1       oster     933: {
1.9       oster     934:        int     unit = raidunit(dev);
1.1       oster     935:        struct raid_softc *rs;
                    936:
1.300     christos  937:        if ((rs = raidget(unit)) == NULL)
                    938:                return ENXIO;
1.1       oster     939:
                    940:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    941:                return (ENXIO);
1.147     oster     942:
1.1       oster     943:        return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
                    944:
                    945: }
                    946:
1.266     dyoung    947: static int
                    948: raid_detach_unlocked(struct raid_softc *rs)
                    949: {
                    950:        int error;
                    951:        RF_Raid_t *raidPtr;
                    952:
1.300     christos  953:        raidPtr = &rs->sc_r;
1.266     dyoung    954:
                    955:        /*
                    956:         * If somebody has a partition mounted, we shouldn't
                    957:         * shutdown.
                    958:         */
                    959:        if (rs->sc_dkdev.dk_openmask != 0)
                    960:                return EBUSY;
                    961:
                    962:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    963:                ;       /* not initialized: nothing to do */
                    964:        else if ((error = rf_Shutdown(raidPtr)) != 0)
                    965:                return error;
                    966:        else
                    967:                rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
                    968:
                    969:        /* Detach the disk. */
1.280     christos  970:        dkwedge_delall(&rs->sc_dkdev);
1.266     dyoung    971:        disk_detach(&rs->sc_dkdev);
                    972:        disk_destroy(&rs->sc_dkdev);
                    973:
1.290     mrg       974:        aprint_normal_dev(rs->sc_dev, "detached\n");
                    975:
1.266     dyoung    976:        return 0;
                    977: }
                    978:
1.1       oster     979: int
1.225     christos  980: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1       oster     981: {
1.9       oster     982:        int     unit = raidunit(dev);
                    983:        int     error = 0;
1.298     buhrow    984:        int     part, pmask, s;
1.262     cegger    985:        cfdata_t cf;
1.1       oster     986:        struct raid_softc *rs;
                    987:        RF_Config_t *k_cfg, *u_cfg;
1.42      oster     988:        RF_Raid_t *raidPtr;
1.48      oster     989:        RF_RaidDisk_t *diskPtr;
1.41      oster     990:        RF_AccTotals_t *totals;
                    991:        RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1       oster     992:        u_char *specific_buf;
1.11      oster     993:        int retcode = 0;
                    994:        int column;
1.269     jld       995: /*     int raidid; */
1.1       oster     996:        struct rf_recon_req *rrcopy, *rr;
1.48      oster     997:        RF_ComponentLabel_t *clabel;
1.209     oster     998:        RF_ComponentLabel_t *ci_label;
1.48      oster     999:        RF_ComponentLabel_t **clabel_ptr;
1.12      oster    1000:        RF_SingleComponent_t *sparePtr,*componentPtr;
                   1001:        RF_SingleComponent_t component;
1.83      oster    1002:        RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41      oster    1003:        int i, j, d;
1.102     fvdl     1004: #ifdef __HAVE_OLD_DISKLABEL
                   1005:        struct disklabel newlabel;
                   1006: #endif
1.213     christos 1007:        struct dkwedge_info *dkw;
1.1       oster    1008:
1.300     christos 1009:        if ((rs = raidget(unit)) == NULL)
                   1010:                return ENXIO;
                   1011:        raidPtr = &rs->sc_r;
1.1       oster    1012:
1.276     mrg      1013:        db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
                   1014:                (int) DISKPART(dev), (int) unit, cmd));
1.1       oster    1015:
                   1016:        /* Must be open for writes for these commands... */
                   1017:        switch (cmd) {
1.213     christos 1018: #ifdef DIOCGSECTORSIZE
                   1019:        case DIOCGSECTORSIZE:
                   1020:                *(u_int *)data = raidPtr->bytesPerSector;
                   1021:                return 0;
                   1022:        case DIOCGMEDIASIZE:
                   1023:                *(off_t *)data =
                   1024:                    (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
                   1025:                return 0;
                   1026: #endif
1.1       oster    1027:        case DIOCSDINFO:
                   1028:        case DIOCWDINFO:
1.102     fvdl     1029: #ifdef __HAVE_OLD_DISKLABEL
                   1030:        case ODIOCWDINFO:
                   1031:        case ODIOCSDINFO:
                   1032: #endif
1.1       oster    1033:        case DIOCWLABEL:
1.213     christos 1034:        case DIOCAWEDGE:
                   1035:        case DIOCDWEDGE:
1.298     buhrow   1036:        case DIOCSSTRATEGY:
1.1       oster    1037:                if ((flag & FWRITE) == 0)
                   1038:                        return (EBADF);
                   1039:        }
                   1040:
                   1041:        /* Must be initialized for these... */
                   1042:        switch (cmd) {
                   1043:        case DIOCGDINFO:
                   1044:        case DIOCSDINFO:
                   1045:        case DIOCWDINFO:
1.102     fvdl     1046: #ifdef __HAVE_OLD_DISKLABEL
                   1047:        case ODIOCGDINFO:
                   1048:        case ODIOCWDINFO:
                   1049:        case ODIOCSDINFO:
                   1050:        case ODIOCGDEFLABEL:
                   1051: #endif
1.1       oster    1052:        case DIOCGPART:
                   1053:        case DIOCWLABEL:
                   1054:        case DIOCGDEFLABEL:
1.213     christos 1055:        case DIOCAWEDGE:
                   1056:        case DIOCDWEDGE:
                   1057:        case DIOCLWEDGES:
1.252     oster    1058:        case DIOCCACHESYNC:
1.1       oster    1059:        case RAIDFRAME_SHUTDOWN:
                   1060:        case RAIDFRAME_REWRITEPARITY:
                   1061:        case RAIDFRAME_GET_INFO:
                   1062:        case RAIDFRAME_RESET_ACCTOTALS:
                   1063:        case RAIDFRAME_GET_ACCTOTALS:
                   1064:        case RAIDFRAME_KEEP_ACCTOTALS:
                   1065:        case RAIDFRAME_GET_SIZE:
                   1066:        case RAIDFRAME_FAIL_DISK:
                   1067:        case RAIDFRAME_COPYBACK:
1.37      oster    1068:        case RAIDFRAME_CHECK_RECON_STATUS:
1.83      oster    1069:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11      oster    1070:        case RAIDFRAME_GET_COMPONENT_LABEL:
                   1071:        case RAIDFRAME_SET_COMPONENT_LABEL:
                   1072:        case RAIDFRAME_ADD_HOT_SPARE:
                   1073:        case RAIDFRAME_REMOVE_HOT_SPARE:
                   1074:        case RAIDFRAME_INIT_LABELS:
1.12      oster    1075:        case RAIDFRAME_REBUILD_IN_PLACE:
1.23      oster    1076:        case RAIDFRAME_CHECK_PARITY:
1.37      oster    1077:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83      oster    1078:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37      oster    1079:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83      oster    1080:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48      oster    1081:        case RAIDFRAME_SET_AUTOCONFIG:
                   1082:        case RAIDFRAME_SET_ROOT:
1.73      oster    1083:        case RAIDFRAME_DELETE_COMPONENT:
                   1084:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.269     jld      1085:        case RAIDFRAME_PARITYMAP_STATUS:
                   1086:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
                   1087:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
                   1088:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.298     buhrow   1089:        case DIOCGSTRATEGY:
                   1090:        case DIOCSSTRATEGY:
1.1       oster    1091:                if ((rs->sc_flags & RAIDF_INITED) == 0)
                   1092:                        return (ENXIO);
                   1093:        }
1.9       oster    1094:
1.1       oster    1095:        switch (cmd) {
1.254     christos 1096: #ifdef COMPAT_50
                   1097:        case RAIDFRAME_GET_INFO50:
                   1098:                return rf_get_info50(raidPtr, data);
                   1099:
                   1100:        case RAIDFRAME_CONFIGURE50:
                   1101:                if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
                   1102:                        return retcode;
                   1103:                goto config;
                   1104: #endif
1.1       oster    1105:                /* configure the system */
                   1106:        case RAIDFRAME_CONFIGURE:
1.48      oster    1107:
                   1108:                if (raidPtr->valid) {
                   1109:                        /* There is a valid RAID set running on this unit! */
                   1110:                        printf("raid%d: Device already configured!\n",unit);
1.66      oster    1111:                        return(EINVAL);
1.48      oster    1112:                }
                   1113:
1.1       oster    1114:                /* copy-in the configuration information */
                   1115:                /* data points to a pointer to the configuration structure */
1.43      oster    1116:
1.9       oster    1117:                u_cfg = *((RF_Config_t **) data);
                   1118:                RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1       oster    1119:                if (k_cfg == NULL) {
1.9       oster    1120:                        return (ENOMEM);
1.1       oster    1121:                }
1.156     dsl      1122:                retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1.1       oster    1123:                if (retcode) {
1.33      oster    1124:                        RF_Free(k_cfg, sizeof(RF_Config_t));
1.46      oster    1125:                        db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9       oster    1126:                                retcode));
                   1127:                        return (retcode);
1.1       oster    1128:                }
1.254     christos 1129:                goto config;
                   1130:        config:
1.9       oster    1131:                /* allocate a buffer for the layout-specific data, and copy it
                   1132:                 * in */
1.1       oster    1133:                if (k_cfg->layoutSpecificSize) {
1.9       oster    1134:                        if (k_cfg->layoutSpecificSize > 10000) {
1.1       oster    1135:                                /* sanity check */
1.33      oster    1136:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.9       oster    1137:                                return (EINVAL);
1.1       oster    1138:                        }
1.9       oster    1139:                        RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
                   1140:                            (u_char *));
1.1       oster    1141:                        if (specific_buf == NULL) {
1.9       oster    1142:                                RF_Free(k_cfg, sizeof(RF_Config_t));
                   1143:                                return (ENOMEM);
1.1       oster    1144:                        }
1.156     dsl      1145:                        retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1.9       oster    1146:                            k_cfg->layoutSpecificSize);
1.1       oster    1147:                        if (retcode) {
1.33      oster    1148:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.186     perry    1149:                                RF_Free(specific_buf,
1.42      oster    1150:                                        k_cfg->layoutSpecificSize);
1.46      oster    1151:                                db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9       oster    1152:                                        retcode));
                   1153:                                return (retcode);
1.1       oster    1154:                        }
1.9       oster    1155:                } else
                   1156:                        specific_buf = NULL;
1.1       oster    1157:                k_cfg->layoutSpecific = specific_buf;
1.9       oster    1158:
                   1159:                /* should do some kind of sanity check on the configuration.
                   1160:                 * Store the sum of all the bytes in the last byte? */
1.1       oster    1161:
                   1162:                /* configure the system */
                   1163:
1.48      oster    1164:                /*
                   1165:                 * Clear the entire RAID descriptor, just to make sure
1.186     perry    1166:                 *  there is no stale data left in the case of a
                   1167:                 *  reconfiguration
1.48      oster    1168:                 */
1.277     christos 1169:                memset(raidPtr, 0, sizeof(*raidPtr));
1.302     christos 1170:                raidPtr->softc = rs;
1.42      oster    1171:                raidPtr->raidid = unit;
1.20      oster    1172:
1.48      oster    1173:                retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1       oster    1174:
1.40      oster    1175:                if (retcode == 0) {
1.37      oster    1176:
1.186     perry    1177:                        /* allow this many simultaneous IO's to
1.40      oster    1178:                           this RAID device */
1.42      oster    1179:                        raidPtr->openings = RAIDOUTSTANDING;
1.186     perry    1180:
1.300     christos 1181:                        raidinit(rs);
1.59      oster    1182:                        rf_markalldirty(raidPtr);
1.9       oster    1183:                }
1.1       oster    1184:                /* free the buffers.  No return code here. */
                   1185:                if (k_cfg->layoutSpecificSize) {
1.9       oster    1186:                        RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1       oster    1187:                }
1.9       oster    1188:                RF_Free(k_cfg, sizeof(RF_Config_t));
                   1189:
                   1190:                return (retcode);
                   1191:
                   1192:                /* shutdown the system */
1.1       oster    1193:        case RAIDFRAME_SHUTDOWN:
1.9       oster    1194:
1.266     dyoung   1195:                part = DISKPART(dev);
                   1196:                pmask = (1 << part);
                   1197:
1.9       oster    1198:                if ((error = raidlock(rs)) != 0)
                   1199:                        return (error);
1.1       oster    1200:
1.9       oster    1201:                if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
                   1202:                    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1.266     dyoung   1203:                        (rs->sc_dkdev.dk_copenmask & pmask)))
                   1204:                        retcode = EBUSY;
                   1205:                else {
                   1206:                        rs->sc_flags |= RAIDF_SHUTDOWN;
                   1207:                        rs->sc_dkdev.dk_copenmask &= ~pmask;
                   1208:                        rs->sc_dkdev.dk_bopenmask &= ~pmask;
                   1209:                        rs->sc_dkdev.dk_openmask &= ~pmask;
                   1210:                        retcode = 0;
1.9       oster    1211:                }
1.11      oster    1212:
1.266     dyoung   1213:                raidunlock(rs);
1.1       oster    1214:
1.266     dyoung   1215:                if (retcode != 0)
                   1216:                        return retcode;
1.16      oster    1217:
1.217     oster    1218:                /* free the pseudo device attach bits */
                   1219:
                   1220:                cf = device_cfdata(rs->sc_dev);
1.266     dyoung   1221:                if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
                   1222:                        free(cf, M_RAIDFRAME);
1.1       oster    1223:
1.9       oster    1224:                return (retcode);
1.11      oster    1225:        case RAIDFRAME_GET_COMPONENT_LABEL:
1.48      oster    1226:                clabel_ptr = (RF_ComponentLabel_t **) data;
1.11      oster    1227:                /* need to read the component label for the disk indicated
1.48      oster    1228:                   by row,column in clabel */
1.11      oster    1229:
1.269     jld      1230:                /*
                   1231:                 * Perhaps there should be an option to skip the in-core
                   1232:                 * copy and hit the disk, as with disklabel(8).
                   1233:                 */
                   1234:                RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1.11      oster    1235:
1.277     christos 1236:                retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1.11      oster    1237:
                   1238:                if (retcode) {
1.277     christos 1239:                        RF_Free(clabel, sizeof(*clabel));
                   1240:                        return retcode;
1.11      oster    1241:                }
                   1242:
1.166     oster    1243:                clabel->row = 0; /* Don't allow looking at anything else.*/
                   1244:
1.48      oster    1245:                column = clabel->column;
1.26      oster    1246:
1.166     oster    1247:                if ((column < 0) || (column >= raidPtr->numCol +
1.277     christos 1248:                    raidPtr->numSpare)) {
                   1249:                        RF_Free(clabel, sizeof(*clabel));
                   1250:                        return EINVAL;
1.11      oster    1251:                }
                   1252:
1.269     jld      1253:                RF_Free(clabel, sizeof(*clabel));
                   1254:
                   1255:                clabel = raidget_component_label(raidPtr, column);
1.11      oster    1256:
1.277     christos 1257:                return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1.11      oster    1258:
1.269     jld      1259: #if 0
1.11      oster    1260:        case RAIDFRAME_SET_COMPONENT_LABEL:
1.48      oster    1261:                clabel = (RF_ComponentLabel_t *) data;
1.11      oster    1262:
                   1263:                /* XXX check the label for valid stuff... */
                   1264:                /* Note that some things *should not* get modified --
1.186     perry    1265:                   the user should be re-initing the labels instead of
1.11      oster    1266:                   trying to patch things.
                   1267:                   */
                   1268:
1.123     oster    1269:                raidid = raidPtr->raidid;
1.224     oster    1270: #ifdef DEBUG
1.123     oster    1271:                printf("raid%d: Got component label:\n", raidid);
                   1272:                printf("raid%d: Version: %d\n", raidid, clabel->version);
                   1273:                printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
                   1274:                printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
                   1275:                printf("raid%d: Column: %d\n", raidid, clabel->column);
                   1276:                printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
                   1277:                printf("raid%d: Clean: %d\n", raidid, clabel->clean);
                   1278:                printf("raid%d: Status: %d\n", raidid, clabel->status);
1.174     oster    1279: #endif
1.166     oster    1280:                clabel->row = 0;
1.48      oster    1281:                column = clabel->column;
1.12      oster    1282:
1.166     oster    1283:                if ((column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1284:                        return(EINVAL);
1.11      oster    1285:                }
1.12      oster    1286:
                   1287:                /* XXX this isn't allowed to do anything for now :-) */
1.48      oster    1288:
                   1289:                /* XXX and before it is, we need to fill in the rest
                   1290:                   of the fields!?!?!?! */
1.269     jld      1291:                memcpy(raidget_component_label(raidPtr, column),
                   1292:                    clabel, sizeof(*clabel));
                   1293:                raidflush_component_label(raidPtr, column);
                   1294:                return (0);
1.12      oster    1295: #endif
1.11      oster    1296:
1.186     perry    1297:        case RAIDFRAME_INIT_LABELS:
1.48      oster    1298:                clabel = (RF_ComponentLabel_t *) data;
1.186     perry    1299:                /*
1.11      oster    1300:                   we only want the serial number from
                   1301:                   the above.  We get all the rest of the information
                   1302:                   from the config that was used to create this RAID
1.186     perry    1303:                   set.
1.11      oster    1304:                   */
1.12      oster    1305:
1.48      oster    1306:                raidPtr->serial_number = clabel->serial_number;
1.186     perry    1307:
1.166     oster    1308:                for(column=0;column<raidPtr->numCol;column++) {
                   1309:                        diskPtr = &raidPtr->Disks[column];
                   1310:                        if (!RF_DEAD_DISK(diskPtr->status)) {
1.269     jld      1311:                                ci_label = raidget_component_label(raidPtr,
                   1312:                                    column);
                   1313:                                /* Zeroing this is important. */
                   1314:                                memset(ci_label, 0, sizeof(*ci_label));
                   1315:                                raid_init_component_label(raidPtr, ci_label);
                   1316:                                ci_label->serial_number =
                   1317:                                    raidPtr->serial_number;
                   1318:                                ci_label->row = 0; /* we dont' pretend to support more */
1.282     enami    1319:                                rf_component_label_set_partitionsize(ci_label,
                   1320:                                    diskPtr->partitionSize);
1.209     oster    1321:                                ci_label->column = column;
1.269     jld      1322:                                raidflush_component_label(raidPtr, column);
1.11      oster    1323:                        }
1.269     jld      1324:                        /* XXXjld what about the spares? */
1.11      oster    1325:                }
1.209     oster    1326:
1.11      oster    1327:                return (retcode);
1.48      oster    1328:        case RAIDFRAME_SET_AUTOCONFIG:
1.78      minoura  1329:                d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186     perry    1330:                printf("raid%d: New autoconfig value is: %d\n",
1.123     oster    1331:                       raidPtr->raidid, d);
1.78      minoura  1332:                *(int *) data = d;
1.48      oster    1333:                return (retcode);
                   1334:
                   1335:        case RAIDFRAME_SET_ROOT:
1.78      minoura  1336:                d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186     perry    1337:                printf("raid%d: New rootpartition value is: %d\n",
1.123     oster    1338:                       raidPtr->raidid, d);
1.78      minoura  1339:                *(int *) data = d;
1.48      oster    1340:                return (retcode);
1.9       oster    1341:
1.1       oster    1342:                /* initialize all parity */
                   1343:        case RAIDFRAME_REWRITEPARITY:
                   1344:
1.42      oster    1345:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17      oster    1346:                        /* Parity for RAID 0 is trivially correct */
1.42      oster    1347:                        raidPtr->parity_good = RF_RAID_CLEAN;
1.17      oster    1348:                        return(0);
                   1349:                }
1.186     perry    1350:
1.42      oster    1351:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.37      oster    1352:                        /* Re-write is already in progress! */
                   1353:                        return(EINVAL);
                   1354:                }
1.27      oster    1355:
1.42      oster    1356:                retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37      oster    1357:                                           rf_RewriteParityThread,
1.42      oster    1358:                                           raidPtr,"raid_parity");
1.9       oster    1359:                return (retcode);
                   1360:
1.11      oster    1361:
                   1362:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster    1363:                sparePtr = (RF_SingleComponent_t *) data;
1.209     oster    1364:                memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
                   1365:                retcode = rf_add_hot_spare(raidPtr, &component);
1.11      oster    1366:                return(retcode);
                   1367:
                   1368:        case RAIDFRAME_REMOVE_HOT_SPARE:
1.73      oster    1369:                return(retcode);
                   1370:
                   1371:        case RAIDFRAME_DELETE_COMPONENT:
                   1372:                componentPtr = (RF_SingleComponent_t *)data;
1.186     perry    1373:                memcpy( &component, componentPtr,
1.73      oster    1374:                        sizeof(RF_SingleComponent_t));
                   1375:                retcode = rf_delete_component(raidPtr, &component);
                   1376:                return(retcode);
                   1377:
                   1378:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1379:                componentPtr = (RF_SingleComponent_t *)data;
1.186     perry    1380:                memcpy( &component, componentPtr,
1.73      oster    1381:                        sizeof(RF_SingleComponent_t));
                   1382:                retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11      oster    1383:                return(retcode);
                   1384:
1.12      oster    1385:        case RAIDFRAME_REBUILD_IN_PLACE:
1.24      oster    1386:
1.42      oster    1387:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1388:                        /* Can't do this on a RAID 0!! */
                   1389:                        return(EINVAL);
                   1390:                }
                   1391:
1.42      oster    1392:                if (raidPtr->recon_in_progress == 1) {
1.37      oster    1393:                        /* a reconstruct is already in progress! */
                   1394:                        return(EINVAL);
                   1395:                }
                   1396:
1.12      oster    1397:                componentPtr = (RF_SingleComponent_t *) data;
1.186     perry    1398:                memcpy( &component, componentPtr,
1.12      oster    1399:                        sizeof(RF_SingleComponent_t));
1.166     oster    1400:                component.row = 0; /* we don't support any more */
1.12      oster    1401:                column = component.column;
1.147     oster    1402:
1.166     oster    1403:                if ((column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1404:                        return(EINVAL);
                   1405:                }
1.37      oster    1406:
1.291     mrg      1407:                rf_lock_mutex2(raidPtr->mutex);
1.166     oster    1408:                if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1.186     perry    1409:                    (raidPtr->numFailures > 0)) {
1.149     oster    1410:                        /* XXX 0 above shouldn't be constant!!! */
                   1411:                        /* some component other than this has failed.
                   1412:                           Let's not make things worse than they already
                   1413:                           are... */
                   1414:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1415:                               raidPtr->raidid);
1.166     oster    1416:                        printf("raid%d:     Col: %d   Too many failures.\n",
                   1417:                               raidPtr->raidid, column);
1.291     mrg      1418:                        rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1419:                        return (EINVAL);
                   1420:                }
1.186     perry    1421:                if (raidPtr->Disks[column].status ==
1.149     oster    1422:                    rf_ds_reconstructing) {
                   1423:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1424:                               raidPtr->raidid);
1.299     oster    1425:                        printf("raid%d:    Col: %d   Reconstruction already occurring!\n", raidPtr->raidid, column);
1.186     perry    1426:
1.291     mrg      1427:                        rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1428:                        return (EINVAL);
                   1429:                }
1.166     oster    1430:                if (raidPtr->Disks[column].status == rf_ds_spared) {
1.291     mrg      1431:                        rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1432:                        return (EINVAL);
                   1433:                }
1.291     mrg      1434:                rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1435:
1.37      oster    1436:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1437:                if (rrcopy == NULL)
                   1438:                        return(ENOMEM);
1.37      oster    1439:
1.42      oster    1440:                rrcopy->raidPtr = (void *) raidPtr;
1.37      oster    1441:                rrcopy->col = column;
                   1442:
1.42      oster    1443:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1444:                                           rf_ReconstructInPlaceThread,
                   1445:                                           rrcopy,"raid_reconip");
1.12      oster    1446:                return(retcode);
                   1447:
1.1       oster    1448:        case RAIDFRAME_GET_INFO:
1.42      oster    1449:                if (!raidPtr->valid)
1.41      oster    1450:                        return (ENODEV);
                   1451:                ucfgp = (RF_DeviceConfig_t **) data;
                   1452:                RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
                   1453:                          (RF_DeviceConfig_t *));
                   1454:                if (d_cfg == NULL)
                   1455:                        return (ENOMEM);
1.166     oster    1456:                d_cfg->rows = 1; /* there is only 1 row now */
1.42      oster    1457:                d_cfg->cols = raidPtr->numCol;
1.166     oster    1458:                d_cfg->ndevs = raidPtr->numCol;
1.41      oster    1459:                if (d_cfg->ndevs >= RF_MAX_DISKS) {
                   1460:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1461:                        return (ENOMEM);
                   1462:                }
1.42      oster    1463:                d_cfg->nspares = raidPtr->numSpare;
1.41      oster    1464:                if (d_cfg->nspares >= RF_MAX_DISKS) {
                   1465:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1466:                        return (ENOMEM);
                   1467:                }
1.42      oster    1468:                d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41      oster    1469:                d = 0;
1.166     oster    1470:                for (j = 0; j < d_cfg->cols; j++) {
                   1471:                        d_cfg->devs[d] = raidPtr->Disks[j];
                   1472:                        d++;
1.41      oster    1473:                }
                   1474:                for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.166     oster    1475:                        d_cfg->spares[i] = raidPtr->Disks[j];
1.41      oster    1476:                }
1.156     dsl      1477:                retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1.41      oster    1478:                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1479:
                   1480:                return (retcode);
1.9       oster    1481:
1.22      oster    1482:        case RAIDFRAME_CHECK_PARITY:
1.42      oster    1483:                *(int *) data = raidPtr->parity_good;
1.22      oster    1484:                return (0);
1.41      oster    1485:
1.269     jld      1486:        case RAIDFRAME_PARITYMAP_STATUS:
1.273     jld      1487:                if (rf_paritymap_ineligible(raidPtr))
                   1488:                        return EINVAL;
1.269     jld      1489:                rf_paritymap_status(raidPtr->parity_map,
                   1490:                    (struct rf_pmstat *)data);
                   1491:                return 0;
                   1492:
                   1493:        case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273     jld      1494:                if (rf_paritymap_ineligible(raidPtr))
                   1495:                        return EINVAL;
1.269     jld      1496:                if (raidPtr->parity_map == NULL)
                   1497:                        return ENOENT; /* ??? */
                   1498:                if (0 != rf_paritymap_set_params(raidPtr->parity_map,
                   1499:                        (struct rf_pmparams *)data, 1))
                   1500:                        return EINVAL;
                   1501:                return 0;
                   1502:
                   1503:        case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273     jld      1504:                if (rf_paritymap_ineligible(raidPtr))
                   1505:                        return EINVAL;
1.269     jld      1506:                *(int *) data = rf_paritymap_get_disable(raidPtr);
                   1507:                return 0;
                   1508:
                   1509:        case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273     jld      1510:                if (rf_paritymap_ineligible(raidPtr))
                   1511:                        return EINVAL;
1.269     jld      1512:                rf_paritymap_set_disable(raidPtr, *(int *)data);
                   1513:                /* XXX should errors be passed up? */
                   1514:                return 0;
                   1515:
1.1       oster    1516:        case RAIDFRAME_RESET_ACCTOTALS:
1.108     thorpej  1517:                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41      oster    1518:                return (0);
1.9       oster    1519:
1.1       oster    1520:        case RAIDFRAME_GET_ACCTOTALS:
1.41      oster    1521:                totals = (RF_AccTotals_t *) data;
1.42      oster    1522:                *totals = raidPtr->acc_totals;
1.41      oster    1523:                return (0);
1.9       oster    1524:
1.1       oster    1525:        case RAIDFRAME_KEEP_ACCTOTALS:
1.42      oster    1526:                raidPtr->keep_acc_totals = *(int *)data;
1.41      oster    1527:                return (0);
1.9       oster    1528:
1.1       oster    1529:        case RAIDFRAME_GET_SIZE:
1.42      oster    1530:                *(int *) data = raidPtr->totalSectors;
1.9       oster    1531:                return (0);
1.1       oster    1532:
                   1533:                /* fail a disk & optionally start reconstruction */
                   1534:        case RAIDFRAME_FAIL_DISK:
1.24      oster    1535:
1.42      oster    1536:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1537:                        /* Can't do this on a RAID 0!! */
                   1538:                        return(EINVAL);
                   1539:                }
                   1540:
1.1       oster    1541:                rr = (struct rf_recon_req *) data;
1.166     oster    1542:                rr->row = 0;
                   1543:                if (rr->col < 0 || rr->col >= raidPtr->numCol)
1.9       oster    1544:                        return (EINVAL);
1.149     oster    1545:
                   1546:
1.291     mrg      1547:                rf_lock_mutex2(raidPtr->mutex);
1.185     oster    1548:                if (raidPtr->status == rf_rs_reconstructing) {
                   1549:                        /* you can't fail a disk while we're reconstructing! */
                   1550:                        /* XXX wrong for RAID6 */
1.291     mrg      1551:                        rf_unlock_mutex2(raidPtr->mutex);
1.185     oster    1552:                        return (EINVAL);
                   1553:                }
1.186     perry    1554:                if ((raidPtr->Disks[rr->col].status ==
                   1555:                     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1.149     oster    1556:                        /* some other component has failed.  Let's not make
                   1557:                           things worse. XXX wrong for RAID6 */
1.291     mrg      1558:                        rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1559:                        return (EINVAL);
                   1560:                }
1.166     oster    1561:                if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1.149     oster    1562:                        /* Can't fail a spared disk! */
1.291     mrg      1563:                        rf_unlock_mutex2(raidPtr->mutex);
1.149     oster    1564:                        return (EINVAL);
                   1565:                }
1.291     mrg      1566:                rf_unlock_mutex2(raidPtr->mutex);
1.1       oster    1567:
1.9       oster    1568:                /* make a copy of the recon request so that we don't rely on
                   1569:                 * the user's buffer */
1.1       oster    1570:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1571:                if (rrcopy == NULL)
                   1572:                        return(ENOMEM);
1.118     wiz      1573:                memcpy(rrcopy, rr, sizeof(*rr));
1.42      oster    1574:                rrcopy->raidPtr = (void *) raidPtr;
1.1       oster    1575:
1.42      oster    1576:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1577:                                           rf_ReconThread,
                   1578:                                           rrcopy,"raid_recon");
1.9       oster    1579:                return (0);
                   1580:
                   1581:                /* invoke a copyback operation after recon on whatever disk
                   1582:                 * needs it, if any */
                   1583:        case RAIDFRAME_COPYBACK:
1.24      oster    1584:
1.42      oster    1585:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1586:                        /* This makes no sense on a RAID 0!! */
                   1587:                        return(EINVAL);
                   1588:                }
                   1589:
1.42      oster    1590:                if (raidPtr->copyback_in_progress == 1) {
1.37      oster    1591:                        /* Copyback is already in progress! */
                   1592:                        return(EINVAL);
                   1593:                }
1.27      oster    1594:
1.42      oster    1595:                retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37      oster    1596:                                           rf_CopybackThread,
1.42      oster    1597:                                           raidPtr,"raid_copyback");
1.37      oster    1598:                return (retcode);
1.9       oster    1599:
1.1       oster    1600:                /* return the percentage completion of reconstruction */
1.37      oster    1601:        case RAIDFRAME_CHECK_RECON_STATUS:
1.42      oster    1602:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71      oster    1603:                        /* This makes no sense on a RAID 0, so tell the
                   1604:                           user it's done. */
                   1605:                        *(int *) data = 100;
                   1606:                        return(0);
1.24      oster    1607:                }
1.166     oster    1608:                if (raidPtr->status != rf_rs_reconstructing)
1.1       oster    1609:                        *(int *) data = 100;
1.171     oster    1610:                else {
                   1611:                        if (raidPtr->reconControl->numRUsTotal > 0) {
                   1612:                                *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
                   1613:                        } else {
                   1614:                                *(int *) data = 0;
                   1615:                        }
                   1616:                }
1.9       oster    1617:                return (0);
1.83      oster    1618:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                   1619:                progressInfoPtr = (RF_ProgressInfo_t **) data;
1.166     oster    1620:                if (raidPtr->status != rf_rs_reconstructing) {
1.83      oster    1621:                        progressInfo.remaining = 0;
                   1622:                        progressInfo.completed = 100;
                   1623:                        progressInfo.total = 100;
                   1624:                } else {
1.186     perry    1625:                        progressInfo.total =
1.166     oster    1626:                                raidPtr->reconControl->numRUsTotal;
1.186     perry    1627:                        progressInfo.completed =
1.166     oster    1628:                                raidPtr->reconControl->numRUsComplete;
1.83      oster    1629:                        progressInfo.remaining = progressInfo.total -
                   1630:                                progressInfo.completed;
                   1631:                }
1.156     dsl      1632:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1633:                                  sizeof(RF_ProgressInfo_t));
                   1634:                return (retcode);
1.9       oster    1635:
1.37      oster    1636:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42      oster    1637:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80      oster    1638:                        /* This makes no sense on a RAID 0, so tell the
                   1639:                           user it's done. */
                   1640:                        *(int *) data = 100;
                   1641:                        return(0);
1.37      oster    1642:                }
1.42      oster    1643:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.186     perry    1644:                        *(int *) data = 100 *
                   1645:                                raidPtr->parity_rewrite_stripes_done /
1.83      oster    1646:                                raidPtr->Layout.numStripe;
1.37      oster    1647:                } else {
                   1648:                        *(int *) data = 100;
                   1649:                }
                   1650:                return (0);
                   1651:
1.83      oster    1652:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                   1653:                progressInfoPtr = (RF_ProgressInfo_t **) data;
                   1654:                if (raidPtr->parity_rewrite_in_progress == 1) {
                   1655:                        progressInfo.total = raidPtr->Layout.numStripe;
1.186     perry    1656:                        progressInfo.completed =
1.83      oster    1657:                                raidPtr->parity_rewrite_stripes_done;
                   1658:                        progressInfo.remaining = progressInfo.total -
                   1659:                                progressInfo.completed;
                   1660:                } else {
                   1661:                        progressInfo.remaining = 0;
                   1662:                        progressInfo.completed = 100;
                   1663:                        progressInfo.total = 100;
                   1664:                }
1.156     dsl      1665:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1666:                                  sizeof(RF_ProgressInfo_t));
                   1667:                return (retcode);
                   1668:
1.37      oster    1669:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42      oster    1670:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37      oster    1671:                        /* This makes no sense on a RAID 0 */
1.83      oster    1672:                        *(int *) data = 100;
                   1673:                        return(0);
1.37      oster    1674:                }
1.42      oster    1675:                if (raidPtr->copyback_in_progress == 1) {
                   1676:                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                   1677:                                raidPtr->Layout.numStripe;
1.37      oster    1678:                } else {
                   1679:                        *(int *) data = 100;
                   1680:                }
                   1681:                return (0);
                   1682:
1.83      oster    1683:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93      oster    1684:                progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83      oster    1685:                if (raidPtr->copyback_in_progress == 1) {
                   1686:                        progressInfo.total = raidPtr->Layout.numStripe;
1.186     perry    1687:                        progressInfo.completed =
1.93      oster    1688:                                raidPtr->copyback_stripes_done;
1.83      oster    1689:                        progressInfo.remaining = progressInfo.total -
                   1690:                                progressInfo.completed;
                   1691:                } else {
                   1692:                        progressInfo.remaining = 0;
                   1693:                        progressInfo.completed = 100;
                   1694:                        progressInfo.total = 100;
                   1695:                }
1.156     dsl      1696:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1697:                                  sizeof(RF_ProgressInfo_t));
                   1698:                return (retcode);
1.37      oster    1699:
1.9       oster    1700:                /* the sparetable daemon calls this to wait for the kernel to
                   1701:                 * need a spare table. this ioctl does not return until a
                   1702:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1703:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1704:                 * -- I should either compute the spare table in the kernel,
                   1705:                 * or have a different -- XXX XXX -- interface (a different
1.42      oster    1706:                 * character device) for delivering the table     -- XXX */
1.250     oster    1707: #if 0
1.1       oster    1708:        case RAIDFRAME_SPARET_WAIT:
1.287     mrg      1709:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1710:                while (!rf_sparet_wait_queue)
1.287     mrg      1711:                        rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.1       oster    1712:                waitreq = rf_sparet_wait_queue;
                   1713:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287     mrg      1714:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1715:
1.42      oster    1716:                /* structure assignment */
1.186     perry    1717:                *((RF_SparetWait_t *) data) = *waitreq;
1.9       oster    1718:
1.1       oster    1719:                RF_Free(waitreq, sizeof(*waitreq));
1.9       oster    1720:                return (0);
                   1721:
                   1722:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1723:                 * code in it that will cause the dameon to exit */
1.1       oster    1724:        case RAIDFRAME_ABORT_SPARET_WAIT:
                   1725:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1726:                waitreq->fcol = -1;
1.287     mrg      1727:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1728:                waitreq->next = rf_sparet_wait_queue;
                   1729:                rf_sparet_wait_queue = waitreq;
1.287     mrg      1730:                rf_broadcast_conf2(rf_sparet_wait_cv);
                   1731:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1732:                return (0);
1.1       oster    1733:
1.9       oster    1734:                /* used by the spare table daemon to deliver a spare table
                   1735:                 * into the kernel */
1.1       oster    1736:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1737:
1.1       oster    1738:                /* install the spare table */
1.42      oster    1739:                retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9       oster    1740:
                   1741:                /* respond to the requestor.  the return status of the spare
                   1742:                 * table installation is passed in the "fcol" field */
1.1       oster    1743:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1744:                waitreq->fcol = retcode;
1.287     mrg      1745:                rf_lock_mutex2(rf_sparet_wait_mutex);
1.1       oster    1746:                waitreq->next = rf_sparet_resp_queue;
                   1747:                rf_sparet_resp_queue = waitreq;
1.287     mrg      1748:                rf_broadcast_cond2(rf_sparet_resp_cv);
                   1749:                rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1750:
                   1751:                return (retcode);
1.1       oster    1752: #endif
                   1753:
1.9       oster    1754:        default:
1.36      oster    1755:                break; /* fall through to the os-specific code below */
1.1       oster    1756:
                   1757:        }
1.9       oster    1758:
1.42      oster    1759:        if (!raidPtr->valid)
1.9       oster    1760:                return (EINVAL);
                   1761:
1.1       oster    1762:        /*
                   1763:         * Add support for "regular" device ioctls here.
                   1764:         */
1.263     haad     1765:
1.264     haad     1766:        error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1.263     haad     1767:        if (error != EPASSTHROUGH)
                   1768:                return (error);
1.9       oster    1769:
1.1       oster    1770:        switch (cmd) {
                   1771:        case DIOCGDINFO:
1.9       oster    1772:                *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1       oster    1773:                break;
1.102     fvdl     1774: #ifdef __HAVE_OLD_DISKLABEL
                   1775:        case ODIOCGDINFO:
                   1776:                newlabel = *(rs->sc_dkdev.dk_label);
                   1777:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1778:                        return ENOTTY;
1.102     fvdl     1779:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1780:                break;
                   1781: #endif
1.1       oster    1782:
                   1783:        case DIOCGPART:
1.9       oster    1784:                ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
                   1785:                ((struct partinfo *) data)->part =
1.1       oster    1786:                    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
                   1787:                break;
                   1788:
                   1789:        case DIOCWDINFO:
                   1790:        case DIOCSDINFO:
1.102     fvdl     1791: #ifdef __HAVE_OLD_DISKLABEL
                   1792:        case ODIOCWDINFO:
                   1793:        case ODIOCSDINFO:
                   1794: #endif
                   1795:        {
                   1796:                struct disklabel *lp;
                   1797: #ifdef __HAVE_OLD_DISKLABEL
                   1798:                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                   1799:                        memset(&newlabel, 0, sizeof newlabel);
                   1800:                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                   1801:                        lp = &newlabel;
                   1802:                } else
                   1803: #endif
                   1804:                lp = (struct disklabel *)data;
                   1805:
1.1       oster    1806:                if ((error = raidlock(rs)) != 0)
                   1807:                        return (error);
                   1808:
                   1809:                rs->sc_flags |= RAIDF_LABELLING;
                   1810:
                   1811:                error = setdisklabel(rs->sc_dkdev.dk_label,
1.102     fvdl     1812:                    lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1       oster    1813:                if (error == 0) {
1.102     fvdl     1814:                        if (cmd == DIOCWDINFO
                   1815: #ifdef __HAVE_OLD_DISKLABEL
                   1816:                            || cmd == ODIOCWDINFO
                   1817: #endif
                   1818:                           )
1.1       oster    1819:                                error = writedisklabel(RAIDLABELDEV(dev),
                   1820:                                    raidstrategy, rs->sc_dkdev.dk_label,
                   1821:                                    rs->sc_dkdev.dk_cpulabel);
                   1822:                }
                   1823:                rs->sc_flags &= ~RAIDF_LABELLING;
                   1824:
                   1825:                raidunlock(rs);
                   1826:
                   1827:                if (error)
                   1828:                        return (error);
                   1829:                break;
1.102     fvdl     1830:        }
1.1       oster    1831:
                   1832:        case DIOCWLABEL:
1.9       oster    1833:                if (*(int *) data != 0)
1.1       oster    1834:                        rs->sc_flags |= RAIDF_WLABEL;
                   1835:                else
                   1836:                        rs->sc_flags &= ~RAIDF_WLABEL;
                   1837:                break;
                   1838:
                   1839:        case DIOCGDEFLABEL:
1.102     fvdl     1840:                raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1       oster    1841:                break;
1.102     fvdl     1842:
                   1843: #ifdef __HAVE_OLD_DISKLABEL
                   1844:        case ODIOCGDEFLABEL:
                   1845:                raidgetdefaultlabel(raidPtr, rs, &newlabel);
                   1846:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1847:                        return ENOTTY;
1.102     fvdl     1848:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1849:                break;
                   1850: #endif
1.1       oster    1851:
1.213     christos 1852:        case DIOCAWEDGE:
                   1853:        case DIOCDWEDGE:
                   1854:                dkw = (void *)data;
                   1855:
                   1856:                /* If the ioctl happens here, the parent is us. */
                   1857:                (void)strcpy(dkw->dkw_parent, rs->sc_xname);
                   1858:                return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
                   1859:
                   1860:        case DIOCLWEDGES:
                   1861:                return dkwedge_list(&rs->sc_dkdev,
                   1862:                    (struct dkwedge_list *)data, l);
1.252     oster    1863:        case DIOCCACHESYNC:
                   1864:                return rf_sync_component_caches(raidPtr);
1.298     buhrow   1865:
                   1866:        case DIOCGSTRATEGY:
                   1867:            {
                   1868:                struct disk_strategy *dks = (void *)data;
                   1869:
                   1870:                s = splbio();
                   1871:                strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
                   1872:                    sizeof(dks->dks_name));
                   1873:                splx(s);
                   1874:                dks->dks_paramlen = 0;
                   1875:
                   1876:                return 0;
                   1877:            }
                   1878:
                   1879:        case DIOCSSTRATEGY:
                   1880:            {
                   1881:                struct disk_strategy *dks = (void *)data;
                   1882:                struct bufq_state *new;
                   1883:                struct bufq_state *old;
                   1884:
                   1885:                if (dks->dks_param != NULL) {
                   1886:                        return EINVAL;
                   1887:                }
                   1888:                dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
                   1889:                error = bufq_alloc(&new, dks->dks_name,
                   1890:                    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
                   1891:                if (error) {
                   1892:                        return error;
                   1893:                }
                   1894:                s = splbio();
                   1895:                old = rs->buf_queue;
                   1896:                bufq_move(new, old);
                   1897:                rs->buf_queue = new;
                   1898:                splx(s);
                   1899:                bufq_free(old);
                   1900:
                   1901:                return 0;
                   1902:            }
                   1903:
1.1       oster    1904:        default:
1.39      oster    1905:                retcode = ENOTTY;
1.1       oster    1906:        }
1.9       oster    1907:        return (retcode);
1.1       oster    1908:
                   1909: }
                   1910:
                   1911:
1.9       oster    1912: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1913:    RAIDframe device.  */
                   1914:
                   1915:
1.59      oster    1916: static void
1.300     christos 1917: raidinit(struct raid_softc *rs)
1.1       oster    1918: {
1.262     cegger   1919:        cfdata_t cf;
1.59      oster    1920:        int     unit;
1.300     christos 1921:        RF_Raid_t *raidPtr = &rs->sc_r;
1.1       oster    1922:
1.59      oster    1923:        unit = raidPtr->raidid;
1.1       oster    1924:
                   1925:
                   1926:        /* XXX should check return code first... */
                   1927:        rs->sc_flags |= RAIDF_INITED;
                   1928:
1.179     itojun   1929:        /* XXX doesn't check bounds. */
                   1930:        snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1.1       oster    1931:
1.217     oster    1932:        /* attach the pseudo device */
                   1933:        cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
                   1934:        cf->cf_name = raid_cd.cd_name;
                   1935:        cf->cf_atname = raid_cd.cd_name;
                   1936:        cf->cf_unit = unit;
                   1937:        cf->cf_fstate = FSTATE_STAR;
                   1938:
                   1939:        rs->sc_dev = config_attach_pseudo(cf);
                   1940:
1.270     christos 1941:        if (rs->sc_dev == NULL) {
1.217     oster    1942:                printf("raid%d: config_attach_pseudo failed\n",
1.270     christos 1943:                    raidPtr->raidid);
1.265     pooka    1944:                rs->sc_flags &= ~RAIDF_INITED;
                   1945:                free(cf, M_RAIDFRAME);
                   1946:                return;
1.217     oster    1947:        }
                   1948:
1.1       oster    1949:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1950:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1951:         * with disklabels. */
1.11      oster    1952:
1.235     oster    1953:        disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.219     oster    1954:        disk_attach(&rs->sc_dkdev);
1.275     mrg      1955:        disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1.1       oster    1956:
                   1957:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1958:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1959:
1.9       oster    1960:        rs->sc_size = raidPtr->totalSectors;
1.234     oster    1961:
                   1962:        dkwedge_discover(&rs->sc_dkdev);
                   1963:
                   1964:        rf_set_properties(rs, raidPtr);
                   1965:
1.1       oster    1966: }
1.150     oster    1967: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1       oster    1968: /* wake up the daemon & tell it to get us a spare table
                   1969:  * XXX
1.9       oster    1970:  * the entries in the queues should be tagged with the raidPtr
1.186     perry    1971:  * so that in the extremely rare case that two recons happen at once,
1.11      oster    1972:  * we know for which device were requesting a spare table
1.1       oster    1973:  * XXX
1.186     perry    1974:  *
1.39      oster    1975:  * XXX This code is not currently used. GO
1.1       oster    1976:  */
1.186     perry    1977: int
1.169     oster    1978: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9       oster    1979: {
                   1980:        int     retcode;
                   1981:
1.287     mrg      1982:        rf_lock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1983:        req->next = rf_sparet_wait_queue;
                   1984:        rf_sparet_wait_queue = req;
1.289     mrg      1985:        rf_broadcast_cond2(rf_sparet_wait_cv);
1.9       oster    1986:
                   1987:        /* mpsleep unlocks the mutex */
                   1988:        while (!rf_sparet_resp_queue) {
1.289     mrg      1989:                rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9       oster    1990:        }
                   1991:        req = rf_sparet_resp_queue;
                   1992:        rf_sparet_resp_queue = req->next;
1.287     mrg      1993:        rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9       oster    1994:
                   1995:        retcode = req->fcol;
                   1996:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1997:                                         * alloc'd */
                   1998:        return (retcode);
1.1       oster    1999: }
1.150     oster    2000: #endif
1.39      oster    2001:
1.186     perry    2002: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11      oster    2003:  * bp & passes it down.
1.1       oster    2004:  * any calls originating in the kernel must use non-blocking I/O
                   2005:  * do some extra sanity checking to return "appropriate" error values for
                   2006:  * certain conditions (to make some standard utilities work)
1.186     perry    2007:  *
1.34      oster    2008:  * Formerly known as: rf_DoAccessKernel
1.1       oster    2009:  */
1.34      oster    2010: void
1.169     oster    2011: raidstart(RF_Raid_t *raidPtr)
1.1       oster    2012: {
                   2013:        RF_SectorCount_t num_blocks, pb, sum;
                   2014:        RF_RaidAddr_t raid_addr;
                   2015:        struct partition *pp;
1.9       oster    2016:        daddr_t blocknum;
1.1       oster    2017:        struct raid_softc *rs;
1.9       oster    2018:        int     do_async;
1.34      oster    2019:        struct buf *bp;
1.180     oster    2020:        int rc;
1.1       oster    2021:
1.300     christos 2022:        rs = raidPtr->softc;
1.56      oster    2023:        /* quick check to see if anything has died recently */
1.291     mrg      2024:        rf_lock_mutex2(raidPtr->mutex);
1.56      oster    2025:        if (raidPtr->numNewFailures > 0) {
1.291     mrg      2026:                rf_unlock_mutex2(raidPtr->mutex);
1.186     perry    2027:                rf_update_component_labels(raidPtr,
1.91      oster    2028:                                           RF_NORMAL_COMPONENT_UPDATE);
1.291     mrg      2029:                rf_lock_mutex2(raidPtr->mutex);
1.56      oster    2030:                raidPtr->numNewFailures--;
                   2031:        }
                   2032:
1.34      oster    2033:        /* Check to see if we're at the limit... */
                   2034:        while (raidPtr->openings > 0) {
1.291     mrg      2035:                rf_unlock_mutex2(raidPtr->mutex);
1.34      oster    2036:
                   2037:                /* get the next item, if any, from the queue */
1.253     yamt     2038:                if ((bp = bufq_get(rs->buf_queue)) == NULL) {
1.34      oster    2039:                        /* nothing more to do */
                   2040:                        return;
                   2041:                }
                   2042:
                   2043:                /* Ok, for the bp we have here, bp->b_blkno is relative to the
1.186     perry    2044:                 * partition.. Need to make it absolute to the underlying
1.34      oster    2045:                 * device.. */
1.1       oster    2046:
1.275     mrg      2047:                blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
1.34      oster    2048:                if (DISKPART(bp->b_dev) != RAW_PART) {
                   2049:                        pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
                   2050:                        blocknum += pp->p_offset;
                   2051:                }
1.1       oster    2052:
1.186     perry    2053:                db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1.34      oster    2054:                            (int) blocknum));
1.186     perry    2055:
1.34      oster    2056:                db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   2057:                db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.186     perry    2058:
                   2059:                /* *THIS* is where we adjust what block we're going to...
1.34      oster    2060:                 * but DO NOT TOUCH bp->b_blkno!!! */
                   2061:                raid_addr = blocknum;
1.186     perry    2062:
1.34      oster    2063:                num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
                   2064:                pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
                   2065:                sum = raid_addr + num_blocks + pb;
                   2066:                if (1 || rf_debugKernelAccess) {
                   2067:                        db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   2068:                                    (int) raid_addr, (int) sum, (int) num_blocks,
                   2069:                                    (int) pb, (int) bp->b_resid));
                   2070:                }
                   2071:                if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
                   2072:                    || (sum < num_blocks) || (sum < pb)) {
                   2073:                        bp->b_error = ENOSPC;
                   2074:                        bp->b_resid = bp->b_bcount;
                   2075:                        biodone(bp);
1.291     mrg      2076:                        rf_lock_mutex2(raidPtr->mutex);
1.34      oster    2077:                        continue;
                   2078:                }
                   2079:                /*
                   2080:                 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   2081:                 */
1.186     perry    2082:
1.34      oster    2083:                if (bp->b_bcount & raidPtr->sectorMask) {
                   2084:                        bp->b_error = EINVAL;
                   2085:                        bp->b_resid = bp->b_bcount;
                   2086:                        biodone(bp);
1.291     mrg      2087:                        rf_lock_mutex2(raidPtr->mutex);
1.34      oster    2088:                        continue;
1.186     perry    2089:
1.34      oster    2090:                }
                   2091:                db1_printf(("Calling DoAccess..\n"));
1.186     perry    2092:
1.1       oster    2093:
1.291     mrg      2094:                rf_lock_mutex2(raidPtr->mutex);
1.34      oster    2095:                raidPtr->openings--;
1.291     mrg      2096:                rf_unlock_mutex2(raidPtr->mutex);
1.1       oster    2097:
1.34      oster    2098:                /*
                   2099:                 * Everything is async.
                   2100:                 */
                   2101:                do_async = 1;
1.186     perry    2102:
1.99      oster    2103:                disk_busy(&rs->sc_dkdev);
                   2104:
1.186     perry    2105:                /* XXX we're still at splbio() here... do we *really*
1.34      oster    2106:                   need to be? */
1.20      oster    2107:
1.186     perry    2108:                /* don't ever condition on bp->b_flags & B_WRITE.
1.99      oster    2109:                 * always condition on B_READ instead */
1.186     perry    2110:
1.180     oster    2111:                rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                   2112:                                 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   2113:                                 do_async, raid_addr, num_blocks,
                   2114:                                 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.151     oster    2115:
1.180     oster    2116:                if (rc) {
                   2117:                        bp->b_error = rc;
                   2118:                        bp->b_resid = bp->b_bcount;
                   2119:                        biodone(bp);
                   2120:                        /* continue loop */
1.186     perry    2121:                }
1.20      oster    2122:
1.291     mrg      2123:                rf_lock_mutex2(raidPtr->mutex);
1.20      oster    2124:        }
1.291     mrg      2125:        rf_unlock_mutex2(raidPtr->mutex);
1.34      oster    2126: }
1.20      oster    2127:
                   2128:
1.7       explorer 2129:
                   2130:
1.1       oster    2131: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   2132:
1.186     perry    2133: int
1.169     oster    2134: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1       oster    2135: {
1.9       oster    2136:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    2137:        struct buf *bp;
1.9       oster    2138:
1.1       oster    2139:        req->queue = queue;
                   2140:        bp = req->bp;
                   2141:
                   2142:        switch (req->type) {
1.9       oster    2143:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
1.1       oster    2144:                /* XXX need to do something extra here.. */
1.9       oster    2145:                /* I'm leaving this in, as I've never actually seen it used,
                   2146:                 * and I'd like folks to report it... GO */
1.1       oster    2147:                printf(("WAKEUP CALLED\n"));
                   2148:                queue->numOutstanding++;
                   2149:
1.197     oster    2150:                bp->b_flags = 0;
1.207     simonb   2151:                bp->b_private = req;
1.1       oster    2152:
1.194     oster    2153:                KernelWakeupFunc(bp);
1.1       oster    2154:                break;
1.9       oster    2155:
1.1       oster    2156:        case RF_IO_TYPE_READ:
                   2157:        case RF_IO_TYPE_WRITE:
1.175     oster    2158: #if RF_ACC_TRACE > 0
1.1       oster    2159:                if (req->tracerec) {
                   2160:                        RF_ETIMER_START(req->tracerec->timer);
                   2161:                }
1.175     oster    2162: #endif
1.194     oster    2163:                InitBP(bp, queue->rf_cinfo->ci_vp,
1.197     oster    2164:                    op, queue->rf_cinfo->ci_dev,
1.9       oster    2165:                    req->sectorOffset, req->numSector,
                   2166:                    req->buf, KernelWakeupFunc, (void *) req,
                   2167:                    queue->raidPtr->logBytesPerSector, req->b_proc);
1.1       oster    2168:
                   2169:                if (rf_debugKernelAccess) {
1.9       oster    2170:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   2171:                                (long) bp->b_blkno));
1.1       oster    2172:                }
                   2173:                queue->numOutstanding++;
                   2174:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    2175:                /* acc wouldn't have been let in if there were any pending
                   2176:                 * reqs at any other priority */
1.1       oster    2177:                queue->curPriority = req->priority;
                   2178:
1.166     oster    2179:                db1_printf(("Going for %c to unit %d col %d\n",
1.186     perry    2180:                            req->type, queue->raidPtr->raidid,
1.166     oster    2181:                            queue->col));
1.1       oster    2182:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    2183:                        (int) req->sectorOffset, (int) req->numSector,
                   2184:                        (int) (req->numSector <<
                   2185:                            queue->raidPtr->logBytesPerSector),
                   2186:                        (int) queue->raidPtr->logBytesPerSector));
1.256     oster    2187:
                   2188:                /*
                   2189:                 * XXX: drop lock here since this can block at
                   2190:                 * least with backing SCSI devices.  Retake it
                   2191:                 * to minimize fuss with calling interfaces.
                   2192:                 */
                   2193:
                   2194:                RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247     oster    2195:                bdev_strategy(bp);
1.256     oster    2196:                RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1       oster    2197:                break;
1.9       oster    2198:
1.1       oster    2199:        default:
                   2200:                panic("bad req->type in rf_DispatchKernelIO");
                   2201:        }
                   2202:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.134     oster    2203:
1.9       oster    2204:        return (0);
1.1       oster    2205: }
1.9       oster    2206: /* this is the callback function associated with a I/O invoked from
1.1       oster    2207:    kernel code.
                   2208:  */
1.186     perry    2209: static void
1.194     oster    2210: KernelWakeupFunc(struct buf *bp)
1.9       oster    2211: {
                   2212:        RF_DiskQueueData_t *req = NULL;
                   2213:        RF_DiskQueue_t *queue;
                   2214:
                   2215:        db1_printf(("recovering the request queue:\n"));
1.285     mrg      2216:
1.207     simonb   2217:        req = bp->b_private;
1.1       oster    2218:
1.9       oster    2219:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    2220:
1.286     mrg      2221:        rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285     mrg      2222:
1.175     oster    2223: #if RF_ACC_TRACE > 0
1.9       oster    2224:        if (req->tracerec) {
                   2225:                RF_ETIMER_STOP(req->tracerec->timer);
                   2226:                RF_ETIMER_EVAL(req->tracerec->timer);
1.288     mrg      2227:                rf_lock_mutex2(rf_tracing_mutex);
1.9       oster    2228:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2229:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   2230:                req->tracerec->num_phys_ios++;
1.288     mrg      2231:                rf_unlock_mutex2(rf_tracing_mutex);
1.9       oster    2232:        }
1.175     oster    2233: #endif
1.1       oster    2234:
1.230     ad       2235:        /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9       oster    2236:         * ballistic, and mark the component as hosed... */
1.36      oster    2237:
1.230     ad       2238:        if (bp->b_error != 0) {
1.9       oster    2239:                /* Mark the disk as dead */
                   2240:                /* but only mark it once... */
1.186     perry    2241:                /* and only if it wouldn't leave this RAID set
1.183     oster    2242:                   completely broken */
1.193     oster    2243:                if (((queue->raidPtr->Disks[queue->col].status ==
                   2244:                      rf_ds_optimal) ||
                   2245:                     (queue->raidPtr->Disks[queue->col].status ==
                   2246:                      rf_ds_used_spare)) &&
                   2247:                     (queue->raidPtr->numFailures <
1.204     simonb   2248:                      queue->raidPtr->Layout.map->faultsTolerated)) {
1.9       oster    2249:                        printf("raid%d: IO Error.  Marking %s as failed.\n",
1.136     oster    2250:                               queue->raidPtr->raidid,
1.166     oster    2251:                               queue->raidPtr->Disks[queue->col].devname);
                   2252:                        queue->raidPtr->Disks[queue->col].status =
1.9       oster    2253:                            rf_ds_failed;
1.166     oster    2254:                        queue->raidPtr->status = rf_rs_degraded;
1.9       oster    2255:                        queue->raidPtr->numFailures++;
1.56      oster    2256:                        queue->raidPtr->numNewFailures++;
1.9       oster    2257:                } else {        /* Disk is already dead... */
                   2258:                        /* printf("Disk already marked as dead!\n"); */
                   2259:                }
1.4       oster    2260:
1.9       oster    2261:        }
1.4       oster    2262:
1.143     oster    2263:        /* Fill in the error value */
1.230     ad       2264:        req->error = bp->b_error;
1.143     oster    2265:
                   2266:        /* Drop this one on the "finished" queue... */
                   2267:        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
                   2268:
                   2269:        /* Let the raidio thread know there is work to be done. */
1.286     mrg      2270:        rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143     oster    2271:
1.286     mrg      2272:        rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1       oster    2273: }
                   2274:
                   2275:
                   2276: /*
                   2277:  * initialize a buf structure for doing an I/O in the kernel.
                   2278:  */
1.186     perry    2279: static void
1.169     oster    2280: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225     christos 2281:        RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.169     oster    2282:        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
                   2283:        struct proc *b_proc)
1.9       oster    2284: {
                   2285:        /* bp->b_flags       = B_PHYS | rw_flag; */
1.242     ad       2286:        bp->b_flags = rw_flag;  /* XXX need B_PHYS here too??? */
                   2287:        bp->b_oflags = 0;
                   2288:        bp->b_cflags = 0;
1.9       oster    2289:        bp->b_bcount = numSect << logBytesPerSector;
                   2290:        bp->b_bufsize = bp->b_bcount;
                   2291:        bp->b_error = 0;
                   2292:        bp->b_dev = dev;
1.187     christos 2293:        bp->b_data = bf;
1.275     mrg      2294:        bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9       oster    2295:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
1.1       oster    2296:        if (bp->b_bcount == 0) {
1.141     provos   2297:                panic("bp->b_bcount is zero in InitBP!!");
1.1       oster    2298:        }
1.161     fvdl     2299:        bp->b_proc = b_proc;
1.9       oster    2300:        bp->b_iodone = cbFunc;
1.207     simonb   2301:        bp->b_private = cbArg;
1.1       oster    2302: }
                   2303:
                   2304: static void
1.186     perry    2305: raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1.169     oster    2306:                    struct disklabel *lp)
1.1       oster    2307: {
1.108     thorpej  2308:        memset(lp, 0, sizeof(*lp));
1.1       oster    2309:
                   2310:        /* fabricate a label... */
                   2311:        lp->d_secperunit = raidPtr->totalSectors;
                   2312:        lp->d_secsize = raidPtr->bytesPerSector;
1.45      oster    2313:        lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105     oster    2314:        lp->d_ntracks = 4 * raidPtr->numCol;
1.186     perry    2315:        lp->d_ncylinders = raidPtr->totalSectors /
1.45      oster    2316:                (lp->d_nsectors * lp->d_ntracks);
1.1       oster    2317:        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
                   2318:
                   2319:        strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9       oster    2320:        lp->d_type = DTYPE_RAID;
1.1       oster    2321:        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
                   2322:        lp->d_rpm = 3600;
                   2323:        lp->d_interleave = 1;
                   2324:        lp->d_flags = 0;
                   2325:
                   2326:        lp->d_partitions[RAW_PART].p_offset = 0;
                   2327:        lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
                   2328:        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
                   2329:        lp->d_npartitions = RAW_PART + 1;
                   2330:
                   2331:        lp->d_magic = DISKMAGIC;
                   2332:        lp->d_magic2 = DISKMAGIC;
                   2333:        lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
                   2334:
                   2335: }
                   2336: /*
                   2337:  * Read the disklabel from the raid device.  If one is not present, fake one
                   2338:  * up.
                   2339:  */
                   2340: static void
1.169     oster    2341: raidgetdisklabel(dev_t dev)
1.1       oster    2342: {
1.9       oster    2343:        int     unit = raidunit(dev);
1.300     christos 2344:        struct raid_softc *rs;
1.158     dsl      2345:        const char   *errstring;
1.300     christos 2346:        struct disklabel *lp;
                   2347:        struct cpu_disklabel *clp;
1.1       oster    2348:        RF_Raid_t *raidPtr;
                   2349:
1.300     christos 2350:        if ((rs = raidget(unit)) == NULL)
                   2351:                return;
                   2352:
                   2353:        lp = rs->sc_dkdev.dk_label;
                   2354:        clp = rs->sc_dkdev.dk_cpulabel;
                   2355:
1.1       oster    2356:        db1_printf(("Getting the disklabel...\n"));
                   2357:
1.108     thorpej  2358:        memset(clp, 0, sizeof(*clp));
1.1       oster    2359:
1.300     christos 2360:        raidPtr = &rs->sc_r;
1.1       oster    2361:
                   2362:        raidgetdefaultlabel(raidPtr, rs, lp);
                   2363:
                   2364:        /*
                   2365:         * Call the generic disklabel extraction routine.
                   2366:         */
                   2367:        errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
                   2368:            rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9       oster    2369:        if (errstring)
1.1       oster    2370:                raidmakedisklabel(rs);
                   2371:        else {
1.9       oster    2372:                int     i;
1.1       oster    2373:                struct partition *pp;
                   2374:
                   2375:                /*
                   2376:                 * Sanity check whether the found disklabel is valid.
                   2377:                 *
                   2378:                 * This is necessary since total size of the raid device
                   2379:                 * may vary when an interleave is changed even though exactly
1.211     oster    2380:                 * same components are used, and old disklabel may used
1.1       oster    2381:                 * if that is found.
                   2382:                 */
                   2383:                if (lp->d_secperunit != rs->sc_size)
1.123     oster    2384:                        printf("raid%d: WARNING: %s: "
1.260     sborrill 2385:                            "total sector size in disklabel (%" PRIu32 ") != "
                   2386:                            "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
                   2387:                            lp->d_secperunit, rs->sc_size);
1.1       oster    2388:                for (i = 0; i < lp->d_npartitions; i++) {
                   2389:                        pp = &lp->d_partitions[i];
                   2390:                        if (pp->p_offset + pp->p_size > rs->sc_size)
1.123     oster    2391:                                printf("raid%d: WARNING: %s: end of partition `%c' "
1.260     sborrill 2392:                                       "exceeds the size of raid (%" PRIu64 ")\n",
                   2393:                                       unit, rs->sc_xname, 'a' + i, rs->sc_size);
1.1       oster    2394:                }
                   2395:        }
                   2396:
                   2397: }
                   2398: /*
                   2399:  * Take care of things one might want to take care of in the event
                   2400:  * that a disklabel isn't present.
                   2401:  */
                   2402: static void
1.169     oster    2403: raidmakedisklabel(struct raid_softc *rs)
1.1       oster    2404: {
                   2405:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   2406:        db1_printf(("Making a label..\n"));
                   2407:
                   2408:        /*
                   2409:         * For historical reasons, if there's no disklabel present
                   2410:         * the raw partition must be marked FS_BSDFFS.
                   2411:         */
                   2412:
                   2413:        lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
                   2414:
                   2415:        strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
                   2416:
                   2417:        lp->d_checksum = dkcksum(lp);
                   2418: }
                   2419: /*
                   2420:  * Wait interruptibly for an exclusive lock.
                   2421:  *
                   2422:  * XXX
                   2423:  * Several drivers do this; it should be abstracted and made MP-safe.
                   2424:  * (Hmm... where have we seen this warning before :->  GO )
                   2425:  */
                   2426: static int
1.169     oster    2427: raidlock(struct raid_softc *rs)
1.1       oster    2428: {
1.9       oster    2429:        int     error;
1.1       oster    2430:
                   2431:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   2432:                rs->sc_flags |= RAIDF_WANTED;
1.9       oster    2433:                if ((error =
                   2434:                        tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1       oster    2435:                        return (error);
                   2436:        }
                   2437:        rs->sc_flags |= RAIDF_LOCKED;
                   2438:        return (0);
                   2439: }
                   2440: /*
                   2441:  * Unlock and wake up any waiters.
                   2442:  */
                   2443: static void
1.169     oster    2444: raidunlock(struct raid_softc *rs)
1.1       oster    2445: {
                   2446:
                   2447:        rs->sc_flags &= ~RAIDF_LOCKED;
                   2448:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   2449:                rs->sc_flags &= ~RAIDF_WANTED;
                   2450:                wakeup(rs);
                   2451:        }
1.11      oster    2452: }
1.186     perry    2453:
1.11      oster    2454:
                   2455: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   2456: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
1.269     jld      2457: #define RF_PARITY_MAP_SIZE   RF_PARITYMAP_NBYTE
1.11      oster    2458:
1.276     mrg      2459: static daddr_t
                   2460: rf_component_info_offset(void)
                   2461: {
                   2462:
                   2463:        return RF_COMPONENT_INFO_OFFSET;
                   2464: }
                   2465:
                   2466: static daddr_t
                   2467: rf_component_info_size(unsigned secsize)
                   2468: {
                   2469:        daddr_t info_size;
                   2470:
                   2471:        KASSERT(secsize);
                   2472:        if (secsize > RF_COMPONENT_INFO_SIZE)
                   2473:                info_size = secsize;
                   2474:        else
                   2475:                info_size = RF_COMPONENT_INFO_SIZE;
                   2476:
                   2477:        return info_size;
                   2478: }
                   2479:
                   2480: static daddr_t
                   2481: rf_parity_map_offset(RF_Raid_t *raidPtr)
                   2482: {
                   2483:        daddr_t map_offset;
                   2484:
                   2485:        KASSERT(raidPtr->bytesPerSector);
                   2486:        if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
                   2487:                map_offset = raidPtr->bytesPerSector;
                   2488:        else
                   2489:                map_offset = RF_COMPONENT_INFO_SIZE;
                   2490:        map_offset += rf_component_info_offset();
                   2491:
                   2492:        return map_offset;
                   2493: }
                   2494:
                   2495: static daddr_t
                   2496: rf_parity_map_size(RF_Raid_t *raidPtr)
                   2497: {
                   2498:        daddr_t map_size;
                   2499:
                   2500:        if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
                   2501:                map_size = raidPtr->bytesPerSector;
                   2502:        else
                   2503:                map_size = RF_PARITY_MAP_SIZE;
                   2504:
                   2505:        return map_size;
                   2506: }
                   2507:
1.186     perry    2508: int
1.269     jld      2509: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12      oster    2510: {
1.269     jld      2511:        RF_ComponentLabel_t *clabel;
                   2512:
                   2513:        clabel = raidget_component_label(raidPtr, col);
                   2514:        clabel->clean = RF_RAID_CLEAN;
                   2515:        raidflush_component_label(raidPtr, col);
1.12      oster    2516:        return(0);
                   2517: }
                   2518:
                   2519:
1.186     perry    2520: int
1.269     jld      2521: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11      oster    2522: {
1.269     jld      2523:        RF_ComponentLabel_t *clabel;
                   2524:
                   2525:        clabel = raidget_component_label(raidPtr, col);
                   2526:        clabel->clean = RF_RAID_DIRTY;
                   2527:        raidflush_component_label(raidPtr, col);
1.11      oster    2528:        return(0);
                   2529: }
                   2530:
                   2531: int
1.269     jld      2532: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2533: {
1.276     mrg      2534:        KASSERT(raidPtr->bytesPerSector);
                   2535:        return raidread_component_label(raidPtr->bytesPerSector,
                   2536:            raidPtr->Disks[col].dev,
1.269     jld      2537:            raidPtr->raid_cinfo[col].ci_vp,
                   2538:            &raidPtr->raid_cinfo[col].ci_label);
                   2539: }
                   2540:
                   2541: RF_ComponentLabel_t *
                   2542: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2543: {
                   2544:        return &raidPtr->raid_cinfo[col].ci_label;
                   2545: }
                   2546:
                   2547: int
                   2548: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
                   2549: {
                   2550:        RF_ComponentLabel_t *label;
                   2551:
                   2552:        label = &raidPtr->raid_cinfo[col].ci_label;
                   2553:        label->mod_counter = raidPtr->mod_counter;
                   2554: #ifndef RF_NO_PARITY_MAP
                   2555:        label->parity_map_modcount = label->mod_counter;
                   2556: #endif
1.276     mrg      2557:        return raidwrite_component_label(raidPtr->bytesPerSector,
                   2558:            raidPtr->Disks[col].dev,
1.269     jld      2559:            raidPtr->raid_cinfo[col].ci_vp, label);
                   2560: }
                   2561:
                   2562:
                   2563: static int
1.276     mrg      2564: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269     jld      2565:     RF_ComponentLabel_t *clabel)
                   2566: {
                   2567:        return raidread_component_area(dev, b_vp, clabel,
                   2568:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2569:            rf_component_info_offset(),
                   2570:            rf_component_info_size(secsize));
1.269     jld      2571: }
                   2572:
                   2573: /* ARGSUSED */
                   2574: static int
                   2575: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
                   2576:     size_t msize, daddr_t offset, daddr_t dsize)
1.11      oster    2577: {
                   2578:        struct buf *bp;
1.130     gehenna  2579:        const struct bdevsw *bdev;
1.11      oster    2580:        int error;
1.186     perry    2581:
1.11      oster    2582:        /* XXX should probably ensure that we don't try to do this if
1.186     perry    2583:           someone has changed rf_protected_sectors. */
1.11      oster    2584:
1.98      oster    2585:        if (b_vp == NULL) {
                   2586:                /* For whatever reason, this component is not valid.
                   2587:                   Don't try to read a component label from it. */
                   2588:                return(EINVAL);
                   2589:        }
                   2590:
1.11      oster    2591:        /* get a block of the appropriate size... */
1.269     jld      2592:        bp = geteblk((int)dsize);
1.11      oster    2593:        bp->b_dev = dev;
                   2594:
                   2595:        /* get our ducks in a row for the read */
1.269     jld      2596:        bp->b_blkno = offset / DEV_BSIZE;
                   2597:        bp->b_bcount = dsize;
1.100     chs      2598:        bp->b_flags |= B_READ;
1.269     jld      2599:        bp->b_resid = dsize;
1.11      oster    2600:
1.130     gehenna  2601:        bdev = bdevsw_lookup(bp->b_dev);
                   2602:        if (bdev == NULL)
                   2603:                return (ENXIO);
                   2604:        (*bdev->d_strategy)(bp);
1.11      oster    2605:
1.186     perry    2606:        error = biowait(bp);
1.11      oster    2607:
                   2608:        if (!error) {
1.269     jld      2609:                memcpy(data, bp->b_data, msize);
1.204     simonb   2610:        }
1.11      oster    2611:
1.233     ad       2612:        brelse(bp, 0);
1.11      oster    2613:        return(error);
                   2614: }
1.269     jld      2615:
                   2616:
                   2617: static int
1.276     mrg      2618: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
                   2619:     RF_ComponentLabel_t *clabel)
1.269     jld      2620: {
                   2621:        return raidwrite_component_area(dev, b_vp, clabel,
                   2622:            sizeof(RF_ComponentLabel_t),
1.276     mrg      2623:            rf_component_info_offset(),
                   2624:            rf_component_info_size(secsize), 0);
1.269     jld      2625: }
                   2626:
1.11      oster    2627: /* ARGSUSED */
1.269     jld      2628: static int
                   2629: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
                   2630:     size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11      oster    2631: {
                   2632:        struct buf *bp;
1.130     gehenna  2633:        const struct bdevsw *bdev;
1.11      oster    2634:        int error;
                   2635:
                   2636:        /* get a block of the appropriate size... */
1.269     jld      2637:        bp = geteblk((int)dsize);
1.11      oster    2638:        bp->b_dev = dev;
                   2639:
                   2640:        /* get our ducks in a row for the write */
1.269     jld      2641:        bp->b_blkno = offset / DEV_BSIZE;
                   2642:        bp->b_bcount = dsize;
                   2643:        bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
                   2644:        bp->b_resid = dsize;
1.11      oster    2645:
1.269     jld      2646:        memset(bp->b_data, 0, dsize);
                   2647:        memcpy(bp->b_data, data, msize);
1.11      oster    2648:
1.130     gehenna  2649:        bdev = bdevsw_lookup(bp->b_dev);
                   2650:        if (bdev == NULL)
                   2651:                return (ENXIO);
                   2652:        (*bdev->d_strategy)(bp);
1.269     jld      2653:        if (asyncp)
                   2654:                return 0;
1.186     perry    2655:        error = biowait(bp);
1.233     ad       2656:        brelse(bp, 0);
1.11      oster    2657:        if (error) {
1.48      oster    2658: #if 1
1.11      oster    2659:                printf("Failed to write RAID component info!\n");
1.48      oster    2660: #endif
1.11      oster    2661:        }
                   2662:
                   2663:        return(error);
1.1       oster    2664: }
1.12      oster    2665:
1.186     perry    2666: void
1.269     jld      2667: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2668: {
                   2669:        int c;
                   2670:
                   2671:        for (c = 0; c < raidPtr->numCol; c++) {
                   2672:                /* Skip dead disks. */
                   2673:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2674:                        continue;
                   2675:                /* XXXjld: what if an error occurs here? */
                   2676:                raidwrite_component_area(raidPtr->Disks[c].dev,
                   2677:                    raidPtr->raid_cinfo[c].ci_vp, map,
                   2678:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2679:                    rf_parity_map_offset(raidPtr),
                   2680:                    rf_parity_map_size(raidPtr), 0);
1.269     jld      2681:        }
                   2682: }
                   2683:
                   2684: void
                   2685: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
                   2686: {
                   2687:        struct rf_paritymap_ondisk tmp;
1.272     oster    2688:        int c,first;
1.269     jld      2689:
1.272     oster    2690:        first=1;
1.269     jld      2691:        for (c = 0; c < raidPtr->numCol; c++) {
                   2692:                /* Skip dead disks. */
                   2693:                if (RF_DEAD_DISK(raidPtr->Disks[c].status))
                   2694:                        continue;
                   2695:                raidread_component_area(raidPtr->Disks[c].dev,
                   2696:                    raidPtr->raid_cinfo[c].ci_vp, &tmp,
                   2697:                    RF_PARITYMAP_NBYTE,
1.276     mrg      2698:                    rf_parity_map_offset(raidPtr),
                   2699:                    rf_parity_map_size(raidPtr));
1.272     oster    2700:                if (first) {
1.269     jld      2701:                        memcpy(map, &tmp, sizeof(*map));
1.272     oster    2702:                        first = 0;
1.269     jld      2703:                } else {
                   2704:                        rf_paritymap_merge(map, &tmp);
                   2705:                }
                   2706:        }
                   2707: }
                   2708:
                   2709: void
1.169     oster    2710: rf_markalldirty(RF_Raid_t *raidPtr)
1.12      oster    2711: {
1.269     jld      2712:        RF_ComponentLabel_t *clabel;
1.146     oster    2713:        int sparecol;
1.166     oster    2714:        int c;
                   2715:        int j;
                   2716:        int scol = -1;
1.12      oster    2717:
                   2718:        raidPtr->mod_counter++;
1.166     oster    2719:        for (c = 0; c < raidPtr->numCol; c++) {
                   2720:                /* we don't want to touch (at all) a disk that has
                   2721:                   failed */
                   2722:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269     jld      2723:                        clabel = raidget_component_label(raidPtr, c);
                   2724:                        if (clabel->status == rf_ds_spared) {
1.186     perry    2725:                                /* XXX do something special...
                   2726:                                   but whatever you do, don't
1.166     oster    2727:                                   try to access it!! */
                   2728:                        } else {
1.269     jld      2729:                                raidmarkdirty(raidPtr, c);
1.12      oster    2730:                        }
1.166     oster    2731:                }
1.186     perry    2732:        }
1.146     oster    2733:
1.12      oster    2734:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2735:                sparecol = raidPtr->numCol + c;
1.166     oster    2736:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2737:                        /*
                   2738:
                   2739:                           we claim this disk is "optimal" if it's
                   2740:                           rf_ds_used_spare, as that means it should be
                   2741:                           directly substitutable for the disk it replaced.
1.12      oster    2742:                           We note that too...
                   2743:
                   2744:                         */
                   2745:
1.166     oster    2746:                        for(j=0;j<raidPtr->numCol;j++) {
                   2747:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2748:                                        scol = j;
                   2749:                                        break;
1.12      oster    2750:                                }
                   2751:                        }
1.186     perry    2752:
1.269     jld      2753:                        clabel = raidget_component_label(raidPtr, sparecol);
1.12      oster    2754:                        /* make sure status is noted */
1.146     oster    2755:
1.269     jld      2756:                        raid_init_component_label(raidPtr, clabel);
1.146     oster    2757:
1.269     jld      2758:                        clabel->row = 0;
                   2759:                        clabel->column = scol;
1.146     oster    2760:                        /* Note: we *don't* change status from rf_ds_used_spare
                   2761:                           to rf_ds_optimal */
                   2762:                        /* clabel.status = rf_ds_optimal; */
1.186     perry    2763:
1.269     jld      2764:                        raidmarkdirty(raidPtr, sparecol);
1.12      oster    2765:                }
                   2766:        }
                   2767: }
                   2768:
1.13      oster    2769:
                   2770: void
1.169     oster    2771: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13      oster    2772: {
1.269     jld      2773:        RF_ComponentLabel_t *clabel;
1.13      oster    2774:        int sparecol;
1.166     oster    2775:        int c;
                   2776:        int j;
                   2777:        int scol;
1.13      oster    2778:
                   2779:        scol = -1;
                   2780:
1.186     perry    2781:        /* XXX should do extra checks to make sure things really are clean,
1.13      oster    2782:           rather than blindly setting the clean bit... */
                   2783:
                   2784:        raidPtr->mod_counter++;
                   2785:
1.166     oster    2786:        for (c = 0; c < raidPtr->numCol; c++) {
                   2787:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269     jld      2788:                        clabel = raidget_component_label(raidPtr, c);
1.201     oster    2789:                        /* make sure status is noted */
1.269     jld      2790:                        clabel->status = rf_ds_optimal;
1.201     oster    2791:
1.214     oster    2792:                        /* note what unit we are configured as */
1.269     jld      2793:                        clabel->last_unit = raidPtr->raidid;
1.214     oster    2794:
1.269     jld      2795:                        raidflush_component_label(raidPtr, c);
1.166     oster    2796:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                   2797:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2798:                                        raidmarkclean(raidPtr, c);
1.91      oster    2799:                                }
1.166     oster    2800:                        }
1.186     perry    2801:                }
1.166     oster    2802:                /* else we don't touch it.. */
1.186     perry    2803:        }
1.63      oster    2804:
                   2805:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2806:                sparecol = raidPtr->numCol + c;
1.110     oster    2807:                /* Need to ensure that the reconstruct actually completed! */
1.166     oster    2808:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2809:                        /*
                   2810:
                   2811:                           we claim this disk is "optimal" if it's
                   2812:                           rf_ds_used_spare, as that means it should be
                   2813:                           directly substitutable for the disk it replaced.
1.63      oster    2814:                           We note that too...
                   2815:
                   2816:                         */
                   2817:
1.166     oster    2818:                        for(j=0;j<raidPtr->numCol;j++) {
                   2819:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2820:                                        scol = j;
                   2821:                                        break;
1.63      oster    2822:                                }
                   2823:                        }
1.186     perry    2824:
1.63      oster    2825:                        /* XXX shouldn't *really* need this... */
1.269     jld      2826:                        clabel = raidget_component_label(raidPtr, sparecol);
1.63      oster    2827:                        /* make sure status is noted */
                   2828:
1.269     jld      2829:                        raid_init_component_label(raidPtr, clabel);
                   2830:
                   2831:                        clabel->column = scol;
                   2832:                        clabel->status = rf_ds_optimal;
                   2833:                        clabel->last_unit = raidPtr->raidid;
1.63      oster    2834:
1.269     jld      2835:                        raidflush_component_label(raidPtr, sparecol);
1.91      oster    2836:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13      oster    2837:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269     jld      2838:                                        raidmarkclean(raidPtr, sparecol);
1.13      oster    2839:                                }
                   2840:                        }
                   2841:                }
                   2842:        }
1.68      oster    2843: }
                   2844:
                   2845: void
1.169     oster    2846: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69      oster    2847: {
                   2848:
                   2849:        if (vp != NULL) {
                   2850:                if (auto_configured == 1) {
1.96      oster    2851:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    2852:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69      oster    2853:                        vput(vp);
1.186     perry    2854:
                   2855:                } else {
1.244     ad       2856:                        (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69      oster    2857:                }
1.186     perry    2858:        }
1.69      oster    2859: }
                   2860:
                   2861:
                   2862: void
1.169     oster    2863: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68      oster    2864: {
1.186     perry    2865:        int r,c;
1.69      oster    2866:        struct vnode *vp;
                   2867:        int acd;
1.68      oster    2868:
                   2869:
                   2870:        /* We take this opportunity to close the vnodes like we should.. */
                   2871:
1.166     oster    2872:        for (c = 0; c < raidPtr->numCol; c++) {
                   2873:                vp = raidPtr->raid_cinfo[c].ci_vp;
                   2874:                acd = raidPtr->Disks[c].auto_configured;
                   2875:                rf_close_component(raidPtr, vp, acd);
                   2876:                raidPtr->raid_cinfo[c].ci_vp = NULL;
                   2877:                raidPtr->Disks[c].auto_configured = 0;
1.68      oster    2878:        }
1.166     oster    2879:
1.68      oster    2880:        for (r = 0; r < raidPtr->numSpare; r++) {
1.166     oster    2881:                vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
                   2882:                acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69      oster    2883:                rf_close_component(raidPtr, vp, acd);
1.166     oster    2884:                raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
                   2885:                raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68      oster    2886:        }
1.37      oster    2887: }
1.63      oster    2888:
1.37      oster    2889:
1.186     perry    2890: void
1.169     oster    2891: rf_ReconThread(struct rf_recon_req *req)
1.37      oster    2892: {
                   2893:        int     s;
                   2894:        RF_Raid_t *raidPtr;
                   2895:
                   2896:        s = splbio();
                   2897:        raidPtr = (RF_Raid_t *) req->raidPtr;
                   2898:        raidPtr->recon_in_progress = 1;
                   2899:
1.166     oster    2900:        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37      oster    2901:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
                   2902:
                   2903:        RF_Free(req, sizeof(*req));
                   2904:
                   2905:        raidPtr->recon_in_progress = 0;
                   2906:        splx(s);
                   2907:
                   2908:        /* That's all... */
1.204     simonb   2909:        kthread_exit(0);        /* does not return */
1.37      oster    2910: }
                   2911:
                   2912: void
1.169     oster    2913: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37      oster    2914: {
                   2915:        int retcode;
                   2916:        int s;
                   2917:
1.184     oster    2918:        raidPtr->parity_rewrite_stripes_done = 0;
1.37      oster    2919:        raidPtr->parity_rewrite_in_progress = 1;
                   2920:        s = splbio();
                   2921:        retcode = rf_RewriteParity(raidPtr);
                   2922:        splx(s);
                   2923:        if (retcode) {
1.279     christos 2924:                printf("raid%d: Error re-writing parity (%d)!\n",
                   2925:                    raidPtr->raidid, retcode);
1.37      oster    2926:        } else {
                   2927:                /* set the clean bit!  If we shutdown correctly,
                   2928:                   the clean bit on each component label will get
                   2929:                   set */
                   2930:                raidPtr->parity_good = RF_RAID_CLEAN;
                   2931:        }
                   2932:        raidPtr->parity_rewrite_in_progress = 0;
1.85      oster    2933:
                   2934:        /* Anyone waiting for us to stop?  If so, inform them... */
                   2935:        if (raidPtr->waitShutdown) {
                   2936:                wakeup(&raidPtr->parity_rewrite_in_progress);
                   2937:        }
1.37      oster    2938:
                   2939:        /* That's all... */
1.204     simonb   2940:        kthread_exit(0);        /* does not return */
1.37      oster    2941: }
                   2942:
                   2943:
                   2944: void
1.169     oster    2945: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37      oster    2946: {
                   2947:        int s;
                   2948:
                   2949:        raidPtr->copyback_in_progress = 1;
                   2950:        s = splbio();
                   2951:        rf_CopybackReconstructedData(raidPtr);
                   2952:        splx(s);
                   2953:        raidPtr->copyback_in_progress = 0;
                   2954:
                   2955:        /* That's all... */
1.204     simonb   2956:        kthread_exit(0);        /* does not return */
1.37      oster    2957: }
                   2958:
                   2959:
                   2960: void
1.169     oster    2961: rf_ReconstructInPlaceThread(struct rf_recon_req *req)
1.37      oster    2962: {
                   2963:        int s;
                   2964:        RF_Raid_t *raidPtr;
1.186     perry    2965:
1.37      oster    2966:        s = splbio();
                   2967:        raidPtr = req->raidPtr;
                   2968:        raidPtr->recon_in_progress = 1;
1.166     oster    2969:        rf_ReconstructInPlace(raidPtr, req->col);
1.37      oster    2970:        RF_Free(req, sizeof(*req));
                   2971:        raidPtr->recon_in_progress = 0;
                   2972:        splx(s);
                   2973:
                   2974:        /* That's all... */
1.204     simonb   2975:        kthread_exit(0);        /* does not return */
1.48      oster    2976: }
                   2977:
1.213     christos 2978: static RF_AutoConfig_t *
                   2979: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276     mrg      2980:     const char *cname, RF_SectorCount_t size, uint64_t numsecs,
                   2981:     unsigned secsize)
1.213     christos 2982: {
                   2983:        int good_one = 0;
                   2984:        RF_ComponentLabel_t *clabel;
                   2985:        RF_AutoConfig_t *ac;
                   2986:
                   2987:        clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
                   2988:        if (clabel == NULL) {
                   2989: oomem:
                   2990:                    while(ac_list) {
                   2991:                            ac = ac_list;
                   2992:                            if (ac->clabel)
                   2993:                                    free(ac->clabel, M_RAIDFRAME);
                   2994:                            ac_list = ac_list->next;
                   2995:                            free(ac, M_RAIDFRAME);
                   2996:                    }
                   2997:                    printf("RAID auto config: out of memory!\n");
                   2998:                    return NULL; /* XXX probably should panic? */
                   2999:        }
                   3000:
1.276     mrg      3001:        if (!raidread_component_label(secsize, dev, vp, clabel)) {
                   3002:                /* Got the label.  Does it look reasonable? */
1.284     mrg      3003:                if (rf_reasonable_label(clabel, numsecs) &&
1.282     enami    3004:                    (rf_component_label_partitionsize(clabel) <= size)) {
1.224     oster    3005: #ifdef DEBUG
1.276     mrg      3006:                        printf("Component on: %s: %llu\n",
1.213     christos 3007:                                cname, (unsigned long long)size);
1.276     mrg      3008:                        rf_print_component_label(clabel);
1.213     christos 3009: #endif
1.276     mrg      3010:                        /* if it's reasonable, add it, else ignore it. */
                   3011:                        ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.213     christos 3012:                                M_NOWAIT);
1.276     mrg      3013:                        if (ac == NULL) {
                   3014:                                free(clabel, M_RAIDFRAME);
                   3015:                                goto oomem;
                   3016:                        }
                   3017:                        strlcpy(ac->devname, cname, sizeof(ac->devname));
                   3018:                        ac->dev = dev;
                   3019:                        ac->vp = vp;
                   3020:                        ac->clabel = clabel;
                   3021:                        ac->next = ac_list;
                   3022:                        ac_list = ac;
                   3023:                        good_one = 1;
                   3024:                }
1.213     christos 3025:        }
                   3026:        if (!good_one) {
                   3027:                /* cleanup */
                   3028:                free(clabel, M_RAIDFRAME);
                   3029:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    3030:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213     christos 3031:                vput(vp);
                   3032:        }
                   3033:        return ac_list;
                   3034: }
                   3035:
1.48      oster    3036: RF_AutoConfig_t *
1.259     cegger   3037: rf_find_raid_components(void)
1.48      oster    3038: {
                   3039:        struct vnode *vp;
                   3040:        struct disklabel label;
1.261     dyoung   3041:        device_t dv;
1.268     dyoung   3042:        deviter_t di;
1.48      oster    3043:        dev_t dev;
1.296     buhrow   3044:        int bmajor, bminor, wedge, rf_part_found;
1.48      oster    3045:        int error;
                   3046:        int i;
                   3047:        RF_AutoConfig_t *ac_list;
1.276     mrg      3048:        uint64_t numsecs;
                   3049:        unsigned secsize;
1.48      oster    3050:
                   3051:        /* initialize the AutoConfig list */
                   3052:        ac_list = NULL;
                   3053:
                   3054:        /* we begin by trolling through *all* the devices on the system */
                   3055:
1.268     dyoung   3056:        for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
                   3057:             dv = deviter_next(&di)) {
1.48      oster    3058:
                   3059:                /* we are only interested in disks... */
1.200     thorpej  3060:                if (device_class(dv) != DV_DISK)
1.48      oster    3061:                        continue;
                   3062:
                   3063:                /* we don't care about floppies... */
1.206     thorpej  3064:                if (device_is_a(dv, "fd")) {
1.119     leo      3065:                        continue;
                   3066:                }
1.129     oster    3067:
                   3068:                /* we don't care about CD's... */
1.206     thorpej  3069:                if (device_is_a(dv, "cd")) {
1.129     oster    3070:                        continue;
                   3071:                }
                   3072:
1.248     oster    3073:                /* we don't care about md's... */
                   3074:                if (device_is_a(dv, "md")) {
                   3075:                        continue;
                   3076:                }
                   3077:
1.120     leo      3078:                /* hdfd is the Atari/Hades floppy driver */
1.206     thorpej  3079:                if (device_is_a(dv, "hdfd")) {
1.121     leo      3080:                        continue;
                   3081:                }
1.206     thorpej  3082:
1.121     leo      3083:                /* fdisa is the Atari/Milan floppy driver */
1.206     thorpej  3084:                if (device_is_a(dv, "fdisa")) {
1.48      oster    3085:                        continue;
                   3086:                }
1.186     perry    3087:
1.48      oster    3088:                /* need to find the device_name_to_block_device_major stuff */
1.245     cegger   3089:                bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.48      oster    3090:
1.296     buhrow   3091:                rf_part_found = 0; /*No raid partition as yet*/
                   3092:
1.48      oster    3093:                /* get a vnode for the raw partition of this disk */
                   3094:
1.213     christos 3095:                wedge = device_is_a(dv, "dk");
                   3096:                bminor = minor(device_unit(dv));
                   3097:                dev = wedge ? makedev(bmajor, bminor) :
                   3098:                    MAKEDISKDEV(bmajor, bminor, RAW_PART);
1.48      oster    3099:                if (bdevvp(dev, &vp))
                   3100:                        panic("RAID can't alloc vnode");
                   3101:
1.293     jmcneill 3102:                error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48      oster    3103:
                   3104:                if (error) {
1.186     perry    3105:                        /* "Who cares."  Continue looking
1.48      oster    3106:                           for something that exists*/
                   3107:                        vput(vp);
                   3108:                        continue;
                   3109:                }
                   3110:
1.276     mrg      3111:                error = getdisksize(vp, &numsecs, &secsize);
                   3112:                if (error) {
                   3113:                        vput(vp);
                   3114:                        continue;
                   3115:                }
1.213     christos 3116:                if (wedge) {
                   3117:                        struct dkwedge_info dkw;
                   3118:                        error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
1.238     pooka    3119:                            NOCRED);
1.213     christos 3120:                        if (error) {
                   3121:                                printf("RAIDframe: can't get wedge info for "
1.245     cegger   3122:                                    "dev %s (%d)\n", device_xname(dv), error);
1.241     oster    3123:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   3124:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   3125:                                vput(vp);
1.213     christos 3126:                                continue;
                   3127:                        }
                   3128:
1.241     oster    3129:                        if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
                   3130:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
                   3131:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
                   3132:                                vput(vp);
1.228     christos 3133:                                continue;
1.241     oster    3134:                        }
1.213     christos 3135:
                   3136:                        ac_list = rf_get_component(ac_list, dev, vp,
1.276     mrg      3137:                            device_xname(dv), dkw.dkw_size, numsecs, secsize);
1.296     buhrow   3138:                        rf_part_found = 1; /*There is a raid component on this disk*/
1.213     christos 3139:                        continue;
                   3140:                }
                   3141:
1.48      oster    3142:                /* Ok, the disk exists.  Go get the disklabel. */
1.238     pooka    3143:                error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
1.48      oster    3144:                if (error) {
                   3145:                        /*
                   3146:                         * XXX can't happen - open() would
                   3147:                         * have errored out (or faked up one)
                   3148:                         */
1.181     thorpej  3149:                        if (error != ENOTTY)
                   3150:                                printf("RAIDframe: can't get label for dev "
1.245     cegger   3151:                                    "%s (%d)\n", device_xname(dv), error);
1.48      oster    3152:                }
                   3153:
                   3154:                /* don't need this any more.  We'll allocate it again
                   3155:                   a little later if we really do... */
1.96      oster    3156:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    3157:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.48      oster    3158:                vput(vp);
                   3159:
1.181     thorpej  3160:                if (error)
                   3161:                        continue;
                   3162:
1.296     buhrow   3163:                rf_part_found = 0; /*No raid partitions yet*/
1.213     christos 3164:                for (i = 0; i < label.d_npartitions; i++) {
                   3165:                        char cname[sizeof(ac_list->devname)];
                   3166:
1.48      oster    3167:                        /* We only support partitions marked as RAID */
                   3168:                        if (label.d_partitions[i].p_fstype != FS_RAID)
                   3169:                                continue;
                   3170:
1.206     thorpej  3171:                        dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
1.48      oster    3172:                        if (bdevvp(dev, &vp))
                   3173:                                panic("RAID can't alloc vnode");
                   3174:
1.238     pooka    3175:                        error = VOP_OPEN(vp, FREAD, NOCRED);
1.48      oster    3176:                        if (error) {
                   3177:                                /* Whatever... */
                   3178:                                vput(vp);
                   3179:                                continue;
                   3180:                        }
1.213     christos 3181:                        snprintf(cname, sizeof(cname), "%s%c",
1.245     cegger   3182:                            device_xname(dv), 'a' + i);
1.213     christos 3183:                        ac_list = rf_get_component(ac_list, dev, vp, cname,
1.276     mrg      3184:                                label.d_partitions[i].p_size, numsecs, secsize);
1.296     buhrow   3185:                                rf_part_found = 1; /*There is at least one raid partition on this disk*/
                   3186:                }
                   3187:
                   3188:                /*
                   3189:                 *If there is no raid component on this disk, either in a
                   3190:                 *disklabel or inside a wedge, check the raw partition as well,
                   3191:                 *as it is possible to configure raid components on raw disk
                   3192:                 *devices.
                   3193:                 */
                   3194:
                   3195:                if (!rf_part_found) {
                   3196:                        char cname[sizeof(ac_list->devname)];
                   3197:
                   3198:                        dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
                   3199:                        if (bdevvp(dev, &vp))
                   3200:                                panic("RAID can't alloc vnode");
                   3201:
                   3202:                        error = VOP_OPEN(vp, FREAD, NOCRED);
                   3203:                        if (error) {
                   3204:                                /* Whatever... */
                   3205:                                vput(vp);
                   3206:                                continue;
                   3207:                        }
                   3208:                        snprintf(cname, sizeof(cname), "%s%c",
                   3209:                            device_xname(dv), 'a' + RAW_PART);
                   3210:                        ac_list = rf_get_component(ac_list, dev, vp, cname,
                   3211:                                label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.48      oster    3212:                }
                   3213:        }
1.268     dyoung   3214:        deviter_release(&di);
1.213     christos 3215:        return ac_list;
1.48      oster    3216: }
1.186     perry    3217:
1.213     christos 3218:
1.292     oster    3219: int
1.284     mrg      3220: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48      oster    3221: {
1.186     perry    3222:
1.48      oster    3223:        if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
                   3224:             (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
                   3225:            ((clabel->clean == RF_RAID_CLEAN) ||
                   3226:             (clabel->clean == RF_RAID_DIRTY)) &&
1.186     perry    3227:            clabel->row >=0 &&
                   3228:            clabel->column >= 0 &&
1.48      oster    3229:            clabel->num_rows > 0 &&
                   3230:            clabel->num_columns > 0 &&
1.186     perry    3231:            clabel->row < clabel->num_rows &&
1.48      oster    3232:            clabel->column < clabel->num_columns &&
                   3233:            clabel->blockSize > 0 &&
1.282     enami    3234:            /*
                   3235:             * numBlocksHi may contain garbage, but it is ok since
                   3236:             * the type is unsigned.  If it is really garbage,
                   3237:             * rf_fix_old_label_size() will fix it.
                   3238:             */
                   3239:            rf_component_label_numblocks(clabel) > 0) {
1.284     mrg      3240:                /*
                   3241:                 * label looks reasonable enough...
                   3242:                 * let's make sure it has no old garbage.
                   3243:                 */
1.292     oster    3244:                if (numsecs)
                   3245:                        rf_fix_old_label_size(clabel, numsecs);
1.48      oster    3246:                return(1);
                   3247:        }
                   3248:        return(0);
                   3249: }
                   3250:
                   3251:
1.278     mrg      3252: /*
                   3253:  * For reasons yet unknown, some old component labels have garbage in
                   3254:  * the newer numBlocksHi region, and this causes lossage.  Since those
                   3255:  * disks will also have numsecs set to less than 32 bits of sectors,
1.299     oster    3256:  * we can determine when this corruption has occurred, and fix it.
1.284     mrg      3257:  *
                   3258:  * The exact same problem, with the same unknown reason, happens to
                   3259:  * the partitionSizeHi member as well.
1.278     mrg      3260:  */
                   3261: static void
                   3262: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
                   3263: {
                   3264:
1.284     mrg      3265:        if (numsecs < ((uint64_t)1 << 32)) {
                   3266:                if (clabel->numBlocksHi) {
                   3267:                        printf("WARNING: total sectors < 32 bits, yet "
                   3268:                               "numBlocksHi set\n"
                   3269:                               "WARNING: resetting numBlocksHi to zero.\n");
                   3270:                        clabel->numBlocksHi = 0;
                   3271:                }
                   3272:
                   3273:                if (clabel->partitionSizeHi) {
                   3274:                        printf("WARNING: total sectors < 32 bits, yet "
                   3275:                               "partitionSizeHi set\n"
                   3276:                               "WARNING: resetting partitionSizeHi to zero.\n");
                   3277:                        clabel->partitionSizeHi = 0;
                   3278:                }
1.278     mrg      3279:        }
                   3280: }
                   3281:
                   3282:
1.224     oster    3283: #ifdef DEBUG
1.48      oster    3284: void
1.169     oster    3285: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48      oster    3286: {
1.282     enami    3287:        uint64_t numBlocks;
1.275     mrg      3288:
1.282     enami    3289:        numBlocks = rf_component_label_numblocks(clabel);
1.275     mrg      3290:
1.48      oster    3291:        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186     perry    3292:               clabel->row, clabel->column,
1.48      oster    3293:               clabel->num_rows, clabel->num_columns);
                   3294:        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
                   3295:               clabel->version, clabel->serial_number,
                   3296:               clabel->mod_counter);
                   3297:        printf("   Clean: %s Status: %d\n",
1.271     dyoung   3298:               clabel->clean ? "Yes" : "No", clabel->status);
1.48      oster    3299:        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
                   3300:               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275     mrg      3301:        printf("   RAID Level: %c  blocksize: %d numBlocks: %"PRIu64"\n",
                   3302:               (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271     dyoung   3303:        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.186     perry    3304:        printf("   Contains root partition: %s\n",
1.271     dyoung   3305:               clabel->root_partition ? "Yes" : "No");
                   3306:        printf("   Last configured as: raid%d\n", clabel->last_unit);
1.51      oster    3307: #if 0
                   3308:           printf("   Config order: %d\n", clabel->config_order);
                   3309: #endif
1.186     perry    3310:
1.48      oster    3311: }
1.133     oster    3312: #endif
1.48      oster    3313:
                   3314: RF_ConfigSet_t *
1.169     oster    3315: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48      oster    3316: {
                   3317:        RF_AutoConfig_t *ac;
                   3318:        RF_ConfigSet_t *config_sets;
                   3319:        RF_ConfigSet_t *cset;
                   3320:        RF_AutoConfig_t *ac_next;
                   3321:
                   3322:
                   3323:        config_sets = NULL;
                   3324:
                   3325:        /* Go through the AutoConfig list, and figure out which components
                   3326:           belong to what sets.  */
                   3327:        ac = ac_list;
                   3328:        while(ac!=NULL) {
                   3329:                /* we're going to putz with ac->next, so save it here
                   3330:                   for use at the end of the loop */
                   3331:                ac_next = ac->next;
                   3332:
                   3333:                if (config_sets == NULL) {
                   3334:                        /* will need at least this one... */
                   3335:                        config_sets = (RF_ConfigSet_t *)
1.186     perry    3336:                                malloc(sizeof(RF_ConfigSet_t),
1.48      oster    3337:                                       M_RAIDFRAME, M_NOWAIT);
                   3338:                        if (config_sets == NULL) {
1.141     provos   3339:                                panic("rf_create_auto_sets: No memory!");
1.48      oster    3340:                        }
                   3341:                        /* this one is easy :) */
                   3342:                        config_sets->ac = ac;
                   3343:                        config_sets->next = NULL;
1.51      oster    3344:                        config_sets->rootable = 0;
1.48      oster    3345:                        ac->next = NULL;
                   3346:                } else {
                   3347:                        /* which set does this component fit into? */
                   3348:                        cset = config_sets;
                   3349:                        while(cset!=NULL) {
1.49      oster    3350:                                if (rf_does_it_fit(cset, ac)) {
1.86      oster    3351:                                        /* looks like it matches... */
                   3352:                                        ac->next = cset->ac;
                   3353:                                        cset->ac = ac;
1.48      oster    3354:                                        break;
                   3355:                                }
                   3356:                                cset = cset->next;
                   3357:                        }
                   3358:                        if (cset==NULL) {
                   3359:                                /* didn't find a match above... new set..*/
                   3360:                                cset = (RF_ConfigSet_t *)
1.186     perry    3361:                                        malloc(sizeof(RF_ConfigSet_t),
1.48      oster    3362:                                               M_RAIDFRAME, M_NOWAIT);
                   3363:                                if (cset == NULL) {
1.141     provos   3364:                                        panic("rf_create_auto_sets: No memory!");
1.48      oster    3365:                                }
                   3366:                                cset->ac = ac;
                   3367:                                ac->next = NULL;
                   3368:                                cset->next = config_sets;
1.51      oster    3369:                                cset->rootable = 0;
1.48      oster    3370:                                config_sets = cset;
                   3371:                        }
                   3372:                }
                   3373:                ac = ac_next;
                   3374:        }
                   3375:
                   3376:
                   3377:        return(config_sets);
                   3378: }
                   3379:
                   3380: static int
1.169     oster    3381: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48      oster    3382: {
                   3383:        RF_ComponentLabel_t *clabel1, *clabel2;
                   3384:
                   3385:        /* If this one matches the *first* one in the set, that's good
                   3386:           enough, since the other members of the set would have been
                   3387:           through here too... */
1.60      oster    3388:        /* note that we are not checking partitionSize here..
                   3389:
                   3390:           Note that we are also not checking the mod_counters here.
1.299     oster    3391:           If everything else matches except the mod_counter, that's
1.60      oster    3392:           good enough for this test.  We will deal with the mod_counters
1.186     perry    3393:           a little later in the autoconfiguration process.
1.60      oster    3394:
                   3395:            (clabel1->mod_counter == clabel2->mod_counter) &&
1.81      oster    3396:
                   3397:           The reason we don't check for this is that failed disks
                   3398:           will have lower modification counts.  If those disks are
                   3399:           not added to the set they used to belong to, then they will
                   3400:           form their own set, which may result in 2 different sets,
                   3401:           for example, competing to be configured at raid0, and
                   3402:           perhaps competing to be the root filesystem set.  If the
                   3403:           wrong ones get configured, or both attempt to become /,
                   3404:           weird behaviour and or serious lossage will occur.  Thus we
                   3405:           need to bring them into the fold here, and kick them out at
                   3406:           a later point.
1.60      oster    3407:
                   3408:        */
1.48      oster    3409:
                   3410:        clabel1 = cset->ac->clabel;
                   3411:        clabel2 = ac->clabel;
                   3412:        if ((clabel1->version == clabel2->version) &&
                   3413:            (clabel1->serial_number == clabel2->serial_number) &&
                   3414:            (clabel1->num_rows == clabel2->num_rows) &&
                   3415:            (clabel1->num_columns == clabel2->num_columns) &&
                   3416:            (clabel1->sectPerSU == clabel2->sectPerSU) &&
                   3417:            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
                   3418:            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
                   3419:            (clabel1->parityConfig == clabel2->parityConfig) &&
                   3420:            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
                   3421:            (clabel1->blockSize == clabel2->blockSize) &&
1.282     enami    3422:            rf_component_label_numblocks(clabel1) ==
                   3423:            rf_component_label_numblocks(clabel2) &&
1.48      oster    3424:            (clabel1->autoconfigure == clabel2->autoconfigure) &&
                   3425:            (clabel1->root_partition == clabel2->root_partition) &&
                   3426:            (clabel1->last_unit == clabel2->last_unit) &&
                   3427:            (clabel1->config_order == clabel2->config_order)) {
                   3428:                /* if it get's here, it almost *has* to be a match */
                   3429:        } else {
1.186     perry    3430:                /* it's not consistent with somebody in the set..
1.48      oster    3431:                   punt */
                   3432:                return(0);
                   3433:        }
                   3434:        /* all was fine.. it must fit... */
                   3435:        return(1);
                   3436: }
                   3437:
                   3438: int
1.169     oster    3439: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48      oster    3440: {
1.51      oster    3441:        RF_AutoConfig_t *ac;
                   3442:        RF_AutoConfig_t *auto_config;
                   3443:        RF_ComponentLabel_t *clabel;
1.166     oster    3444:        int c;
1.51      oster    3445:        int num_cols;
                   3446:        int num_missing;
1.86      oster    3447:        int mod_counter;
1.87      oster    3448:        int mod_counter_found;
1.88      oster    3449:        int even_pair_failed;
                   3450:        char parity_type;
1.186     perry    3451:
1.51      oster    3452:
1.48      oster    3453:        /* check to see that we have enough 'live' components
                   3454:           of this set.  If so, we can configure it if necessary */
                   3455:
1.51      oster    3456:        num_cols = cset->ac->clabel->num_columns;
1.88      oster    3457:        parity_type = cset->ac->clabel->parityConfig;
1.51      oster    3458:
                   3459:        /* XXX Check for duplicate components!?!?!? */
                   3460:
1.86      oster    3461:        /* Determine what the mod_counter is supposed to be for this set. */
                   3462:
1.87      oster    3463:        mod_counter_found = 0;
1.101     oster    3464:        mod_counter = 0;
1.86      oster    3465:        ac = cset->ac;
                   3466:        while(ac!=NULL) {
1.87      oster    3467:                if (mod_counter_found==0) {
1.86      oster    3468:                        mod_counter = ac->clabel->mod_counter;
1.87      oster    3469:                        mod_counter_found = 1;
                   3470:                } else {
                   3471:                        if (ac->clabel->mod_counter > mod_counter) {
                   3472:                                mod_counter = ac->clabel->mod_counter;
                   3473:                        }
1.86      oster    3474:                }
                   3475:                ac = ac->next;
                   3476:        }
                   3477:
1.51      oster    3478:        num_missing = 0;
                   3479:        auto_config = cset->ac;
                   3480:
1.166     oster    3481:        even_pair_failed = 0;
                   3482:        for(c=0; c<num_cols; c++) {
                   3483:                ac = auto_config;
                   3484:                while(ac!=NULL) {
1.186     perry    3485:                        if ((ac->clabel->column == c) &&
1.166     oster    3486:                            (ac->clabel->mod_counter == mod_counter)) {
                   3487:                                /* it's this one... */
1.224     oster    3488: #ifdef DEBUG
1.166     oster    3489:                                printf("Found: %s at %d\n",
                   3490:                                       ac->devname,c);
1.51      oster    3491: #endif
1.166     oster    3492:                                break;
1.51      oster    3493:                        }
1.166     oster    3494:                        ac=ac->next;
                   3495:                }
                   3496:                if (ac==NULL) {
1.51      oster    3497:                                /* Didn't find one here! */
1.88      oster    3498:                                /* special case for RAID 1, especially
                   3499:                                   where there are more than 2
                   3500:                                   components (where RAIDframe treats
                   3501:                                   things a little differently :( ) */
1.166     oster    3502:                        if (parity_type == '1') {
                   3503:                                if (c%2 == 0) { /* even component */
                   3504:                                        even_pair_failed = 1;
                   3505:                                } else { /* odd component.  If
                   3506:                                            we're failed, and
                   3507:                                            so is the even
                   3508:                                            component, it's
                   3509:                                            "Good Night, Charlie" */
                   3510:                                        if (even_pair_failed == 1) {
                   3511:                                                return(0);
1.88      oster    3512:                                        }
                   3513:                                }
1.166     oster    3514:                        } else {
                   3515:                                /* normal accounting */
                   3516:                                num_missing++;
1.88      oster    3517:                        }
1.166     oster    3518:                }
                   3519:                if ((parity_type == '1') && (c%2 == 1)) {
1.88      oster    3520:                                /* Just did an even component, and we didn't
1.186     perry    3521:                                   bail.. reset the even_pair_failed flag,
1.88      oster    3522:                                   and go on to the next component.... */
1.166     oster    3523:                        even_pair_failed = 0;
1.51      oster    3524:                }
                   3525:        }
                   3526:
                   3527:        clabel = cset->ac->clabel;
                   3528:
                   3529:        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
                   3530:            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
                   3531:            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                   3532:                /* XXX this needs to be made *much* more general */
                   3533:                /* Too many failures */
                   3534:                return(0);
                   3535:        }
                   3536:        /* otherwise, all is well, and we've got enough to take a kick
                   3537:           at autoconfiguring this set */
                   3538:        return(1);
1.48      oster    3539: }
                   3540:
                   3541: void
1.169     oster    3542: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222     christos 3543:                        RF_Raid_t *raidPtr)
1.48      oster    3544: {
                   3545:        RF_ComponentLabel_t *clabel;
1.77      oster    3546:        int i;
1.48      oster    3547:
                   3548:        clabel = ac->clabel;
                   3549:
                   3550:        /* 1. Fill in the common stuff */
1.166     oster    3551:        config->numRow = clabel->num_rows = 1;
1.48      oster    3552:        config->numCol = clabel->num_columns;
                   3553:        config->numSpare = 0; /* XXX should this be set here? */
                   3554:        config->sectPerSU = clabel->sectPerSU;
                   3555:        config->SUsPerPU = clabel->SUsPerPU;
                   3556:        config->SUsPerRU = clabel->SUsPerRU;
                   3557:        config->parityConfig = clabel->parityConfig;
                   3558:        /* XXX... */
                   3559:        strcpy(config->diskQueueType,"fifo");
                   3560:        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
                   3561:        config->layoutSpecificSize = 0; /* XXX ?? */
                   3562:
                   3563:        while(ac!=NULL) {
                   3564:                /* row/col values will be in range due to the checks
                   3565:                   in reasonable_label() */
1.166     oster    3566:                strcpy(config->devnames[0][ac->clabel->column],
1.48      oster    3567:                       ac->devname);
                   3568:                ac = ac->next;
                   3569:        }
                   3570:
1.77      oster    3571:        for(i=0;i<RF_MAXDBGV;i++) {
1.163     fvdl     3572:                config->debugVars[i][0] = 0;
1.77      oster    3573:        }
1.48      oster    3574: }
                   3575:
                   3576: int
1.169     oster    3577: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3578: {
1.269     jld      3579:        RF_ComponentLabel_t *clabel;
1.166     oster    3580:        int column;
1.148     oster    3581:        int sparecol;
1.48      oster    3582:
1.54      oster    3583:        raidPtr->autoconfigure = new_value;
1.166     oster    3584:
                   3585:        for(column=0; column<raidPtr->numCol; column++) {
                   3586:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3587:                        clabel = raidget_component_label(raidPtr, column);
                   3588:                        clabel->autoconfigure = new_value;
                   3589:                        raidflush_component_label(raidPtr, column);
1.48      oster    3590:                }
                   3591:        }
1.148     oster    3592:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3593:                sparecol = raidPtr->numCol + column;
1.166     oster    3594:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3595:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3596:                        clabel->autoconfigure = new_value;
                   3597:                        raidflush_component_label(raidPtr, sparecol);
1.148     oster    3598:                }
                   3599:        }
1.48      oster    3600:        return(new_value);
                   3601: }
                   3602:
                   3603: int
1.169     oster    3604: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3605: {
1.269     jld      3606:        RF_ComponentLabel_t *clabel;
1.166     oster    3607:        int column;
1.148     oster    3608:        int sparecol;
1.48      oster    3609:
1.54      oster    3610:        raidPtr->root_partition = new_value;
1.166     oster    3611:        for(column=0; column<raidPtr->numCol; column++) {
                   3612:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269     jld      3613:                        clabel = raidget_component_label(raidPtr, column);
                   3614:                        clabel->root_partition = new_value;
                   3615:                        raidflush_component_label(raidPtr, column);
1.148     oster    3616:                }
                   3617:        }
                   3618:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3619:                sparecol = raidPtr->numCol + column;
1.166     oster    3620:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269     jld      3621:                        clabel = raidget_component_label(raidPtr, sparecol);
                   3622:                        clabel->root_partition = new_value;
                   3623:                        raidflush_component_label(raidPtr, sparecol);
1.48      oster    3624:                }
                   3625:        }
                   3626:        return(new_value);
                   3627: }
                   3628:
                   3629: void
1.169     oster    3630: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48      oster    3631: {
                   3632:        RF_AutoConfig_t *ac;
1.186     perry    3633:
1.48      oster    3634:        ac = cset->ac;
                   3635:        while(ac!=NULL) {
                   3636:                /* Close the vp, and give it back */
                   3637:                if (ac->vp) {
1.96      oster    3638:                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.238     pooka    3639:                        VOP_CLOSE(ac->vp, FREAD, NOCRED);
1.48      oster    3640:                        vput(ac->vp);
1.86      oster    3641:                        ac->vp = NULL;
1.48      oster    3642:                }
                   3643:                ac = ac->next;
                   3644:        }
                   3645: }
                   3646:
                   3647:
                   3648: void
1.169     oster    3649: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48      oster    3650: {
                   3651:        RF_AutoConfig_t *ac;
                   3652:        RF_AutoConfig_t *next_ac;
1.186     perry    3653:
1.48      oster    3654:        ac = cset->ac;
                   3655:        while(ac!=NULL) {
                   3656:                next_ac = ac->next;
                   3657:                /* nuke the label */
                   3658:                free(ac->clabel, M_RAIDFRAME);
                   3659:                /* cleanup the config structure */
                   3660:                free(ac, M_RAIDFRAME);
                   3661:                /* "next.." */
                   3662:                ac = next_ac;
                   3663:        }
                   3664:        /* and, finally, nuke the config set */
                   3665:        free(cset, M_RAIDFRAME);
                   3666: }
                   3667:
                   3668:
                   3669: void
1.169     oster    3670: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48      oster    3671: {
                   3672:        /* current version number */
1.186     perry    3673:        clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57      oster    3674:        clabel->serial_number = raidPtr->serial_number;
1.48      oster    3675:        clabel->mod_counter = raidPtr->mod_counter;
1.269     jld      3676:
1.166     oster    3677:        clabel->num_rows = 1;
1.48      oster    3678:        clabel->num_columns = raidPtr->numCol;
                   3679:        clabel->clean = RF_RAID_DIRTY; /* not clean */
                   3680:        clabel->status = rf_ds_optimal; /* "It's good!" */
1.186     perry    3681:
1.48      oster    3682:        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                   3683:        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
                   3684:        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54      oster    3685:
                   3686:        clabel->blockSize = raidPtr->bytesPerSector;
1.282     enami    3687:        rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54      oster    3688:
1.48      oster    3689:        /* XXX not portable */
                   3690:        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54      oster    3691:        clabel->maxOutstanding = raidPtr->maxOutstanding;
                   3692:        clabel->autoconfigure = raidPtr->autoconfigure;
                   3693:        clabel->root_partition = raidPtr->root_partition;
1.48      oster    3694:        clabel->last_unit = raidPtr->raidid;
1.54      oster    3695:        clabel->config_order = raidPtr->config_order;
1.269     jld      3696:
                   3697: #ifndef RF_NO_PARITY_MAP
                   3698:        rf_paritymap_init_label(raidPtr->parity_map, clabel);
                   3699: #endif
1.51      oster    3700: }
                   3701:
1.300     christos 3702: struct raid_softc *
                   3703: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51      oster    3704: {
                   3705:        RF_Raid_t *raidPtr;
                   3706:        RF_Config_t *config;
                   3707:        int raidID;
1.300     christos 3708:        struct raid_softc *sc;
1.51      oster    3709:
1.224     oster    3710: #ifdef DEBUG
1.72      oster    3711:        printf("RAID autoconfigure\n");
1.127     oster    3712: #endif
1.51      oster    3713:
                   3714:        /* 1. Create a config structure */
1.300     christos 3715:        config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
                   3716:        if (config == NULL) {
1.51      oster    3717:                printf("Out of mem!?!?\n");
                   3718:                                /* XXX do something more intelligent here. */
1.300     christos 3719:                return NULL;
1.51      oster    3720:        }
1.77      oster    3721:
1.186     perry    3722:        /*
                   3723:           2. Figure out what RAID ID this one is supposed to live at
1.51      oster    3724:           See if we can get the same RAID dev that it was configured
1.186     perry    3725:           on last time..
1.51      oster    3726:        */
                   3727:
                   3728:        raidID = cset->ac->clabel->last_unit;
1.300     christos 3729:        for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
                   3730:                continue;
1.224     oster    3731: #ifdef DEBUG
1.72      oster    3732:        printf("Configuring raid%d:\n",raidID);
1.127     oster    3733: #endif
                   3734:
1.300     christos 3735:        raidPtr = &sc->sc_r;
1.51      oster    3736:
                   3737:        /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302     christos 3738:        raidPtr->softc = sc;
1.51      oster    3739:        raidPtr->raidid = raidID;
                   3740:        raidPtr->openings = RAIDOUTSTANDING;
                   3741:
                   3742:        /* 3. Build the configuration structure */
                   3743:        rf_create_configuration(cset->ac, config, raidPtr);
                   3744:
                   3745:        /* 4. Do the configuration */
1.300     christos 3746:        if (rf_Configure(raidPtr, config, cset->ac) == 0) {
                   3747:                raidinit(sc);
1.186     perry    3748:
1.300     christos 3749:                rf_markalldirty(raidPtr);
                   3750:                raidPtr->autoconfigure = 1; /* XXX do this here? */
1.51      oster    3751:                if (cset->ac->clabel->root_partition==1) {
                   3752:                        /* everything configured just fine.  Make a note
                   3753:                           that this set is eligible to be root. */
                   3754:                        cset->rootable = 1;
1.54      oster    3755:                        /* XXX do this here? */
1.300     christos 3756:                        raidPtr->root_partition = 1;
1.51      oster    3757:                }
1.300     christos 3758:        } else {
                   3759:                raidput(sc);
                   3760:                sc = NULL;
1.51      oster    3761:        }
                   3762:
                   3763:        /* 5. Cleanup */
                   3764:        free(config, M_RAIDFRAME);
1.300     christos 3765:        return sc;
1.99      oster    3766: }
                   3767:
                   3768: void
1.169     oster    3769: rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
1.99      oster    3770: {
                   3771:        struct buf *bp;
1.300     christos 3772:        struct raid_softc *rs;
1.99      oster    3773:
                   3774:        bp = (struct buf *)desc->bp;
1.300     christos 3775:        rs = desc->raidPtr->softc;
                   3776:        disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
                   3777:            (bp->b_flags & B_READ));
1.13      oster    3778: }
1.177     oster    3779:
                   3780: void
1.187     christos 3781: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
                   3782:             size_t xmin, size_t xmax)
1.177     oster    3783: {
1.227     ad       3784:        pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187     christos 3785:        pool_sethiwat(p, xmax);
                   3786:        pool_prime(p, xmin);
                   3787:        pool_setlowat(p, xmin);
1.177     oster    3788: }
1.190     oster    3789:
                   3790: /*
1.300     christos 3791:  * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
1.190     oster    3792:  * if there is IO pending and if that IO could possibly be done for a
                   3793:  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
                   3794:  * otherwise.
                   3795:  *
                   3796:  */
                   3797:
                   3798: int
1.300     christos 3799: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190     oster    3800: {
1.300     christos 3801:        struct raid_softc *rs = raidPtr->softc;
                   3802:        if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
1.190     oster    3803:                /* there is work to do */
                   3804:                return 0;
                   3805:        }
                   3806:        /* default is nothing to do */
                   3807:        return 1;
                   3808: }
1.213     christos 3809:
                   3810: int
1.294     oster    3811: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213     christos 3812: {
1.275     mrg      3813:        uint64_t numsecs;
                   3814:        unsigned secsize;
1.213     christos 3815:        int error;
                   3816:
1.275     mrg      3817:        error = getdisksize(vp, &numsecs, &secsize);
1.213     christos 3818:        if (error == 0) {
1.275     mrg      3819:                diskPtr->blockSize = secsize;
                   3820:                diskPtr->numBlocks = numsecs - rf_protectedSectors;
                   3821:                diskPtr->partitionSize = numsecs;
1.213     christos 3822:                return 0;
                   3823:        }
                   3824:        return error;
                   3825: }
1.217     oster    3826:
                   3827: static int
1.261     dyoung   3828: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217     oster    3829: {
                   3830:        return 1;
                   3831: }
                   3832:
                   3833: static void
1.261     dyoung   3834: raid_attach(device_t parent, device_t self, void *aux)
1.217     oster    3835: {
                   3836:
                   3837: }
                   3838:
                   3839:
                   3840: static int
1.261     dyoung   3841: raid_detach(device_t self, int flags)
1.217     oster    3842: {
1.266     dyoung   3843:        int error;
1.303   ! christos 3844:        struct raid_softc *rs = raidget(device_unit(self));
        !          3845:
        !          3846:        if (rs == NULL)
        !          3847:                return ENXIO;
1.266     dyoung   3848:
                   3849:        if ((error = raidlock(rs)) != 0)
                   3850:                return (error);
1.217     oster    3851:
1.266     dyoung   3852:        error = raid_detach_unlocked(rs);
                   3853:
                   3854:        raidunlock(rs);
1.217     oster    3855:
1.303   ! christos 3856:        /* XXXkd: raidput(rs) ??? */
        !          3857:
1.266     dyoung   3858:        return error;
1.217     oster    3859: }
                   3860:
1.234     oster    3861: static void
                   3862: rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
                   3863: {
                   3864:        prop_dictionary_t disk_info, odisk_info, geom;
                   3865:        disk_info = prop_dictionary_create();
                   3866:        geom = prop_dictionary_create();
                   3867:        prop_dictionary_set_uint64(geom, "sectors-per-unit",
                   3868:                                   raidPtr->totalSectors);
                   3869:        prop_dictionary_set_uint32(geom, "sector-size",
                   3870:                                   raidPtr->bytesPerSector);
                   3871:
                   3872:        prop_dictionary_set_uint16(geom, "sectors-per-track",
                   3873:                                   raidPtr->Layout.dataSectorsPerStripe);
                   3874:        prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
                   3875:                                   4 * raidPtr->numCol);
                   3876:
                   3877:        prop_dictionary_set_uint64(geom, "cylinders-per-unit",
                   3878:           raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
                   3879:           (4 * raidPtr->numCol)));
                   3880:
                   3881:        prop_dictionary_set(disk_info, "geometry", geom);
                   3882:        prop_object_release(geom);
                   3883:        prop_dictionary_set(device_properties(rs->sc_dev),
                   3884:                            "disk-info", disk_info);
                   3885:        odisk_info = rs->sc_dkdev.dk_info;
                   3886:        rs->sc_dkdev.dk_info = disk_info;
                   3887:        if (odisk_info)
                   3888:                prop_object_release(odisk_info);
                   3889: }
1.252     oster    3890:
                   3891: /*
                   3892:  * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
                   3893:  * We end up returning whatever error was returned by the first cache flush
                   3894:  * that fails.
                   3895:  */
                   3896:
1.269     jld      3897: int
1.252     oster    3898: rf_sync_component_caches(RF_Raid_t *raidPtr)
                   3899: {
                   3900:        int c, sparecol;
                   3901:        int e,error;
                   3902:        int force = 1;
                   3903:
                   3904:        error = 0;
                   3905:        for (c = 0; c < raidPtr->numCol; c++) {
                   3906:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                   3907:                        e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
                   3908:                                          &force, FWRITE, NOCRED);
                   3909:                        if (e) {
1.255     oster    3910:                                if (e != ENODEV)
                   3911:                                        printf("raid%d: cache flush to component %s failed.\n",
                   3912:                                               raidPtr->raidid, raidPtr->Disks[c].devname);
1.252     oster    3913:                                if (error == 0) {
                   3914:                                        error = e;
                   3915:                                }
                   3916:                        }
                   3917:                }
                   3918:        }
                   3919:
                   3920:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   3921:                sparecol = raidPtr->numCol + c;
                   3922:                /* Need to ensure that the reconstruct actually completed! */
                   3923:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                   3924:                        e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
                   3925:                                          DIOCCACHESYNC, &force, FWRITE, NOCRED);
                   3926:                        if (e) {
1.255     oster    3927:                                if (e != ENODEV)
                   3928:                                        printf("raid%d: cache flush to component %s failed.\n",
                   3929:                                               raidPtr->raidid, raidPtr->Disks[sparecol].devname);
1.252     oster    3930:                                if (error == 0) {
                   3931:                                        error = e;
                   3932:                                }
                   3933:                        }
                   3934:                }
                   3935:        }
                   3936:        return error;
                   3937: }

CVSweb <webmaster@jp.NetBSD.org>