[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.190

1.190   ! oster       1: /*     $NetBSD: rf_netbsdkintf.c,v 1.189 2005/09/24 22:51:55 oster Exp $       */
1.1       oster       2: /*-
                      3:  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
                      4:  * All rights reserved.
                      5:  *
                      6:  * This code is derived from software contributed to The NetBSD Foundation
                      7:  * by Greg Oster; Jason R. Thorpe.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *        This product includes software developed by the NetBSD
                     20:  *        Foundation, Inc. and its contributors.
                     21:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     22:  *    contributors may be used to endorse or promote products derived
                     23:  *    from this software without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     26:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     27:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     28:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     29:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     30:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     31:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     32:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     33:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     34:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     35:  * POSSIBILITY OF SUCH DAMAGE.
                     36:  */
                     37:
                     38: /*
                     39:  * Copyright (c) 1990, 1993
                     40:  *      The Regents of the University of California.  All rights reserved.
                     41:  *
                     42:  * This code is derived from software contributed to Berkeley by
                     43:  * the Systems Programming Group of the University of Utah Computer
                     44:  * Science Department.
                     45:  *
                     46:  * Redistribution and use in source and binary forms, with or without
                     47:  * modification, are permitted provided that the following conditions
                     48:  * are met:
                     49:  * 1. Redistributions of source code must retain the above copyright
                     50:  *    notice, this list of conditions and the following disclaimer.
                     51:  * 2. Redistributions in binary form must reproduce the above copyright
                     52:  *    notice, this list of conditions and the following disclaimer in the
                     53:  *    documentation and/or other materials provided with the distribution.
1.162     agc        54:  * 3. Neither the name of the University nor the names of its contributors
                     55:  *    may be used to endorse or promote products derived from this software
                     56:  *    without specific prior written permission.
                     57:  *
                     58:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     59:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     60:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     61:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     62:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     63:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     64:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     65:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     66:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     67:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     68:  * SUCH DAMAGE.
                     69:  *
                     70:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                     71:  *
                     72:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     73:  */
                     74:
                     75: /*
                     76:  * Copyright (c) 1988 University of Utah.
                     77:  *
                     78:  * This code is derived from software contributed to Berkeley by
                     79:  * the Systems Programming Group of the University of Utah Computer
                     80:  * Science Department.
                     81:  *
                     82:  * Redistribution and use in source and binary forms, with or without
                     83:  * modification, are permitted provided that the following conditions
                     84:  * are met:
                     85:  * 1. Redistributions of source code must retain the above copyright
                     86:  *    notice, this list of conditions and the following disclaimer.
                     87:  * 2. Redistributions in binary form must reproduce the above copyright
                     88:  *    notice, this list of conditions and the following disclaimer in the
                     89:  *    documentation and/or other materials provided with the distribution.
1.1       oster      90:  * 3. All advertising materials mentioning features or use of this software
                     91:  *    must display the following acknowledgement:
                     92:  *      This product includes software developed by the University of
                     93:  *      California, Berkeley and its contributors.
                     94:  * 4. Neither the name of the University nor the names of its contributors
                     95:  *    may be used to endorse or promote products derived from this software
                     96:  *    without specific prior written permission.
                     97:  *
                     98:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     99:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                    100:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                    101:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                    102:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                    103:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                    104:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                    105:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                    106:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                    107:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                    108:  * SUCH DAMAGE.
                    109:  *
                    110:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                    111:  *
                    112:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                    113:  */
                    114:
                    115: /*
                    116:  * Copyright (c) 1995 Carnegie-Mellon University.
                    117:  * All rights reserved.
                    118:  *
                    119:  * Authors: Mark Holland, Jim Zelenka
                    120:  *
                    121:  * Permission to use, copy, modify and distribute this software and
                    122:  * its documentation is hereby granted, provided that both the copyright
                    123:  * notice and this permission notice appear in all copies of the
                    124:  * software, derivative works or modified versions, and any portions
                    125:  * thereof, and that both notices appear in supporting documentation.
                    126:  *
                    127:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                    128:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                    129:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                    130:  *
                    131:  * Carnegie Mellon requests users of this software to return to
                    132:  *
                    133:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                    134:  *  School of Computer Science
                    135:  *  Carnegie Mellon University
                    136:  *  Pittsburgh PA 15213-3890
                    137:  *
                    138:  * any improvements or extensions that they make and grant Carnegie the
                    139:  * rights to redistribute these changes.
                    140:  */
                    141:
                    142: /***********************************************************
                    143:  *
                    144:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    145:  *
                    146:  ***********************************************************/
1.112     lukem     147:
                    148: #include <sys/cdefs.h>
1.190   ! oster     149: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.189 2005/09/24 22:51:55 oster Exp $");
1.1       oster     150:
1.113     lukem     151: #include <sys/param.h>
1.1       oster     152: #include <sys/errno.h>
                    153: #include <sys/pool.h>
1.152     thorpej   154: #include <sys/proc.h>
1.1       oster     155: #include <sys/queue.h>
                    156: #include <sys/disk.h>
                    157: #include <sys/device.h>
                    158: #include <sys/stat.h>
                    159: #include <sys/ioctl.h>
                    160: #include <sys/fcntl.h>
                    161: #include <sys/systm.h>
                    162: #include <sys/namei.h>
                    163: #include <sys/vnode.h>
                    164: #include <sys/disklabel.h>
                    165: #include <sys/conf.h>
                    166: #include <sys/lock.h>
                    167: #include <sys/buf.h>
1.182     yamt      168: #include <sys/bufq.h>
1.1       oster     169: #include <sys/user.h>
1.65      oster     170: #include <sys/reboot.h>
1.8       oster     171:
1.110     oster     172: #include <dev/raidframe/raidframevar.h>
                    173: #include <dev/raidframe/raidframeio.h>
1.8       oster     174: #include "raid.h"
1.62      oster     175: #include "opt_raid_autoconfig.h"
1.1       oster     176: #include "rf_raid.h"
1.44      oster     177: #include "rf_copyback.h"
1.1       oster     178: #include "rf_dag.h"
                    179: #include "rf_dagflags.h"
1.99      oster     180: #include "rf_desc.h"
1.1       oster     181: #include "rf_diskqueue.h"
                    182: #include "rf_etimer.h"
                    183: #include "rf_general.h"
                    184: #include "rf_kintf.h"
                    185: #include "rf_options.h"
                    186: #include "rf_driver.h"
                    187: #include "rf_parityscan.h"
                    188: #include "rf_threadstuff.h"
                    189:
1.133     oster     190: #ifdef DEBUG
1.9       oster     191: int     rf_kdebug_level = 0;
1.1       oster     192: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9       oster     193: #else                          /* DEBUG */
1.1       oster     194: #define db1_printf(a) { }
1.9       oster     195: #endif                         /* DEBUG */
1.1       oster     196:
1.9       oster     197: static RF_Raid_t **raidPtrs;   /* global raid device descriptors */
1.1       oster     198:
1.11      oster     199: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
1.1       oster     200:
1.10      oster     201: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    202:                                                 * spare table */
                    203: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    204:                                                 * installation process */
1.153     thorpej   205:
                    206: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10      oster     207:
1.1       oster     208: /* prototypes */
1.187     christos  209: static void KernelWakeupFunc(struct buf *);
                    210: static void InitBP(struct buf *, struct vnode *, unsigned,
                    211:     dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
                    212:     void *, int, struct proc *);
1.104     oster     213: static void raidinit(RF_Raid_t *);
1.1       oster     214:
1.104     oster     215: void raidattach(int);
1.130     gehenna   216:
                    217: dev_type_open(raidopen);
                    218: dev_type_close(raidclose);
                    219: dev_type_read(raidread);
                    220: dev_type_write(raidwrite);
                    221: dev_type_ioctl(raidioctl);
                    222: dev_type_strategy(raidstrategy);
                    223: dev_type_dump(raiddump);
                    224: dev_type_size(raidsize);
                    225:
                    226: const struct bdevsw raid_bdevsw = {
                    227:        raidopen, raidclose, raidstrategy, raidioctl,
                    228:        raiddump, raidsize, D_DISK
                    229: };
                    230:
                    231: const struct cdevsw raid_cdevsw = {
                    232:        raidopen, raidclose, raidread, raidwrite, raidioctl,
1.144     jdolecek  233:        nostop, notty, nopoll, nommap, nokqfilter, D_DISK
1.130     gehenna   234: };
1.1       oster     235:
                    236: /*
                    237:  * Pilfered from ccd.c
                    238:  */
                    239:
1.10      oster     240: struct raidbuf {
                    241:        struct buf rf_buf;      /* new I/O buf.  MUST BE FIRST!!! */
                    242:        struct buf *rf_obp;     /* ptr. to original I/O buf */
1.11      oster     243:        RF_DiskQueueData_t *req;/* the request that this was part of.. */
1.10      oster     244: };
1.1       oster     245:
1.9       oster     246: /* XXX Not sure if the following should be replacing the raidPtrs above,
1.186     perry     247:    or if it should be used in conjunction with that...
1.59      oster     248: */
1.1       oster     249:
1.10      oster     250: struct raid_softc {
                    251:        int     sc_flags;       /* flags */
                    252:        int     sc_cflags;      /* configuration flags */
1.11      oster     253:        size_t  sc_size;        /* size of the raid device */
1.10      oster     254:        char    sc_xname[20];   /* XXX external name */
                    255:        struct disk sc_dkdev;   /* generic disk device info */
1.125     hannken   256:        struct bufq_state buf_queue;    /* used for the device queue */
1.10      oster     257: };
1.1       oster     258: /* sc_flags */
                    259: #define RAIDF_INITED   0x01    /* unit has been initialized */
                    260: #define RAIDF_WLABEL   0x02    /* label area is writable */
                    261: #define RAIDF_LABELLING        0x04    /* unit is currently being labelled */
                    262: #define RAIDF_WANTED   0x40    /* someone is waiting to obtain a lock */
                    263: #define RAIDF_LOCKED   0x80    /* unit is locked */
                    264:
                    265: #define        raidunit(x)     DISKUNIT(x)
1.48      oster     266: int numraid = 0;
1.1       oster     267:
1.186     perry     268: /*
                    269:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    270:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     271:  * kernel memory, especially on writes, and in degraded mode reads.
1.186     perry     272:  *
                    273:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    274:  * a single 64K write will typically require 64K for the old data,
                    275:  * 64K for the old parity, and 64K for the new parity, for a total
1.28      oster     276:  * of 192K (if the parity buffer is not re-used immediately).
1.110     oster     277:  * Even it if is used immediately, that's still 128K, which when multiplied
1.28      oster     278:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186     perry     279:  *
1.28      oster     280:  * Now in degraded mode, for example, a 64K read on the above setup may
1.186     perry     281:  * require data reconstruction, which will require *all* of the 4 remaining
1.28      oster     282:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     283:  */
                    284:
                    285: #ifndef RAIDOUTSTANDING
1.28      oster     286: #define RAIDOUTSTANDING   6
1.20      oster     287: #endif
                    288:
1.1       oster     289: #define RAIDLABELDEV(dev)      \
                    290:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    291:
                    292: /* declared here, and made public, for the benefit of KVM stuff.. */
1.10      oster     293: struct raid_softc *raid_softc;
1.9       oster     294:
1.186     perry     295: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
1.104     oster     296:                                     struct disklabel *);
                    297: static void raidgetdisklabel(dev_t);
                    298: static void raidmakedisklabel(struct raid_softc *);
1.1       oster     299:
1.104     oster     300: static int raidlock(struct raid_softc *);
                    301: static void raidunlock(struct raid_softc *);
1.1       oster     302:
1.104     oster     303: static void rf_markalldirty(RF_Raid_t *);
1.48      oster     304:
                    305: struct device *raidrootdev;
1.1       oster     306:
1.104     oster     307: void rf_ReconThread(struct rf_recon_req *);
                    308: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
                    309: void rf_CopybackThread(RF_Raid_t *raidPtr);
                    310: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
1.142     thorpej   311: int rf_autoconfig(struct device *self);
                    312: void rf_buildroothack(RF_ConfigSet_t *);
1.104     oster     313:
                    314: RF_AutoConfig_t *rf_find_raid_components(void);
                    315: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
                    316: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
                    317: static int rf_reasonable_label(RF_ComponentLabel_t *);
                    318: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
                    319: int rf_set_autoconfig(RF_Raid_t *, int);
                    320: int rf_set_rootpartition(RF_Raid_t *, int);
                    321: void rf_release_all_vps(RF_ConfigSet_t *);
                    322: void rf_cleanup_config_set(RF_ConfigSet_t *);
                    323: int rf_have_enough_components(RF_ConfigSet_t *);
                    324: int rf_auto_config_set(RF_ConfigSet_t *, int *);
1.48      oster     325:
                    326: static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
1.62      oster     327:                                  allow autoconfig to take place.
                    328:                                  Note that this is overridden by having
1.186     perry     329:                                  RAID_AUTOCONFIG as an option in the
1.62      oster     330:                                  kernel config file.  */
1.37      oster     331:
1.177     oster     332: struct RF_Pools_s rf_pools;
                    333:
1.10      oster     334: void
1.169     oster     335: raidattach(int num)
1.1       oster     336: {
1.14      oster     337:        int raidID;
                    338:        int i, rc;
1.1       oster     339:
                    340: #ifdef DEBUG
1.9       oster     341:        printf("raidattach: Asked for %d units\n", num);
1.1       oster     342: #endif
                    343:
                    344:        if (num <= 0) {
                    345: #ifdef DIAGNOSTIC
                    346:                panic("raidattach: count <= 0");
                    347: #endif
                    348:                return;
                    349:        }
1.9       oster     350:        /* This is where all the initialization stuff gets done. */
1.1       oster     351:
1.50      oster     352:        numraid = num;
                    353:
1.1       oster     354:        /* Make some space for requested number of units... */
                    355:
1.167     oster     356:        RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
1.1       oster     357:        if (raidPtrs == NULL) {
1.141     provos    358:                panic("raidPtrs is NULL!!");
1.1       oster     359:        }
1.116     thorpej   360:
                    361:        /* Initialize the component buffer pool. */
1.177     oster     362:        rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
                    363:                     "raidpl", num * RAIDOUTSTANDING,
                    364:                     2 * num * RAIDOUTSTANDING);
1.116     thorpej   365:
1.168     oster     366:        rf_mutex_init(&rf_sparet_wait_mutex);
1.14      oster     367:
                    368:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
                    369:
1.58      oster     370:        for (i = 0; i < num; i++)
1.14      oster     371:                raidPtrs[i] = NULL;
                    372:        rc = rf_BootRaidframe();
                    373:        if (rc == 0)
                    374:                printf("Kernelized RAIDframe activated\n");
                    375:        else
1.141     provos    376:                panic("Serious error booting RAID!!");
1.14      oster     377:
1.9       oster     378:        /* put together some datastructures like the CCD device does.. This
                    379:         * lets us lock the device and what-not when it gets opened. */
1.1       oster     380:
                    381:        raid_softc = (struct raid_softc *)
1.48      oster     382:                malloc(num * sizeof(struct raid_softc),
                    383:                       M_RAIDFRAME, M_NOWAIT);
1.1       oster     384:        if (raid_softc == NULL) {
                    385:                printf("WARNING: no memory for RAIDframe driver\n");
                    386:                return;
                    387:        }
1.50      oster     388:
1.108     thorpej   389:        memset(raid_softc, 0, num * sizeof(struct raid_softc));
1.34      oster     390:
1.48      oster     391:        raidrootdev = (struct device *)malloc(num * sizeof(struct device),
                    392:                                              M_RAIDFRAME, M_NOWAIT);
                    393:        if (raidrootdev == NULL) {
1.141     provos    394:                panic("No memory for RAIDframe driver!!?!?!");
1.48      oster     395:        }
                    396:
1.9       oster     397:        for (raidID = 0; raidID < num; raidID++) {
1.126     hannken   398:                bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
1.188     yamt      399:                pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
1.48      oster     400:
                    401:                raidrootdev[raidID].dv_class  = DV_DISK;
                    402:                raidrootdev[raidID].dv_cfdata = NULL;
                    403:                raidrootdev[raidID].dv_unit   = raidID;
                    404:                raidrootdev[raidID].dv_parent = NULL;
                    405:                raidrootdev[raidID].dv_flags  = 0;
1.179     itojun    406:                snprintf(raidrootdev[raidID].dv_xname,
                    407:                    sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
1.48      oster     408:
1.167     oster     409:                RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
1.11      oster     410:                          (RF_Raid_t *));
1.9       oster     411:                if (raidPtrs[raidID] == NULL) {
1.39      oster     412:                        printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
                    413:                        numraid = raidID;
                    414:                        return;
1.1       oster     415:                }
                    416:        }
1.48      oster     417:
1.114     lukem     418: #ifdef RAID_AUTOCONFIG
1.62      oster     419:        raidautoconfig = 1;
                    420: #endif
                    421:
1.142     thorpej   422:        /*
                    423:         * Register a finalizer which will be used to auto-config RAID
                    424:         * sets once all real hardware devices have been found.
                    425:         */
                    426:        if (config_finalize_register(NULL, rf_autoconfig) != 0)
                    427:                printf("WARNING: unable to register RAIDframe finalizer\n");
                    428: }
                    429:
                    430: int
                    431: rf_autoconfig(struct device *self)
                    432: {
                    433:        RF_AutoConfig_t *ac_list;
                    434:        RF_ConfigSet_t *config_sets;
                    435:
                    436:        if (raidautoconfig == 0)
                    437:                return (0);
                    438:
                    439:        /* XXX This code can only be run once. */
                    440:        raidautoconfig = 0;
                    441:
1.48      oster     442:        /* 1. locate all RAID components on the system */
1.142     thorpej   443: #ifdef DEBUG
                    444:        printf("Searching for RAID components...\n");
1.48      oster     445: #endif
                    446:        ac_list = rf_find_raid_components();
                    447:
1.142     thorpej   448:        /* 2. Sort them into their respective sets. */
1.48      oster     449:        config_sets = rf_create_auto_sets(ac_list);
                    450:
1.142     thorpej   451:        /*
                    452:         * 3. Evaluate each set andconfigure the valid ones.
                    453:         * This gets done in rf_buildroothack().
                    454:         */
                    455:        rf_buildroothack(config_sets);
1.48      oster     456:
1.142     thorpej   457:        return (1);
1.48      oster     458: }
                    459:
                    460: void
1.142     thorpej   461: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48      oster     462: {
                    463:        RF_ConfigSet_t *cset;
                    464:        RF_ConfigSet_t *next_cset;
1.51      oster     465:        int retcode;
1.48      oster     466:        int raidID;
1.51      oster     467:        int rootID;
                    468:        int num_root;
1.48      oster     469:
1.101     oster     470:        rootID = 0;
1.51      oster     471:        num_root = 0;
1.48      oster     472:        cset = config_sets;
                    473:        while(cset != NULL ) {
                    474:                next_cset = cset->next;
1.186     perry     475:                if (rf_have_enough_components(cset) &&
1.51      oster     476:                    cset->ac->clabel->autoconfigure==1) {
                    477:                        retcode = rf_auto_config_set(cset,&raidID);
                    478:                        if (!retcode) {
                    479:                                if (cset->rootable) {
                    480:                                        rootID = raidID;
                    481:                                        num_root++;
                    482:                                }
                    483:                        } else {
                    484:                                /* The autoconfig didn't work :( */
                    485: #if DEBUG
                    486:                                printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
                    487: #endif
                    488:                                rf_release_all_vps(cset);
1.48      oster     489:                        }
                    490:                } else {
1.186     perry     491:                        /* we're not autoconfiguring this set...
1.48      oster     492:                           release the associated resources */
1.49      oster     493:                        rf_release_all_vps(cset);
1.48      oster     494:                }
                    495:                /* cleanup */
1.49      oster     496:                rf_cleanup_config_set(cset);
1.48      oster     497:                cset = next_cset;
                    498:        }
1.122     oster     499:
                    500:        /* we found something bootable... */
                    501:
                    502:        if (num_root == 1) {
1.186     perry     503:                booted_device = &raidrootdev[rootID];
1.122     oster     504:        } else if (num_root > 1) {
                    505:                /* we can't guess.. require the user to answer... */
                    506:                boothowto |= RB_ASKNAME;
1.51      oster     507:        }
1.1       oster     508: }
                    509:
                    510:
                    511: int
1.169     oster     512: raidsize(dev_t dev)
1.1       oster     513: {
                    514:        struct raid_softc *rs;
                    515:        struct disklabel *lp;
1.9       oster     516:        int     part, unit, omask, size;
1.1       oster     517:
                    518:        unit = raidunit(dev);
                    519:        if (unit >= numraid)
                    520:                return (-1);
                    521:        rs = &raid_softc[unit];
                    522:
                    523:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    524:                return (-1);
                    525:
                    526:        part = DISKPART(dev);
                    527:        omask = rs->sc_dkdev.dk_openmask & (1 << part);
                    528:        lp = rs->sc_dkdev.dk_label;
                    529:
1.161     fvdl      530:        if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
1.1       oster     531:                return (-1);
                    532:
                    533:        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                    534:                size = -1;
                    535:        else
                    536:                size = lp->d_partitions[part].p_size *
                    537:                    (lp->d_secsize / DEV_BSIZE);
                    538:
1.161     fvdl      539:        if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
1.1       oster     540:                return (-1);
                    541:
                    542:        return (size);
                    543:
                    544: }
                    545:
                    546: int
1.169     oster     547: raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t  size)
1.1       oster     548: {
                    549:        /* Not implemented. */
                    550:        return ENXIO;
                    551: }
                    552: /* ARGSUSED */
                    553: int
1.169     oster     554: raidopen(dev_t dev, int flags, int fmt, struct proc *p)
1.1       oster     555: {
1.9       oster     556:        int     unit = raidunit(dev);
1.1       oster     557:        struct raid_softc *rs;
                    558:        struct disklabel *lp;
1.9       oster     559:        int     part, pmask;
                    560:        int     error = 0;
                    561:
1.1       oster     562:        if (unit >= numraid)
                    563:                return (ENXIO);
                    564:        rs = &raid_softc[unit];
                    565:
                    566:        if ((error = raidlock(rs)) != 0)
1.9       oster     567:                return (error);
1.1       oster     568:        lp = rs->sc_dkdev.dk_label;
                    569:
                    570:        part = DISKPART(dev);
                    571:        pmask = (1 << part);
                    572:
                    573:        if ((rs->sc_flags & RAIDF_INITED) &&
                    574:            (rs->sc_dkdev.dk_openmask == 0))
1.9       oster     575:                raidgetdisklabel(dev);
1.1       oster     576:
                    577:        /* make sure that this partition exists */
                    578:
                    579:        if (part != RAW_PART) {
                    580:                if (((rs->sc_flags & RAIDF_INITED) == 0) ||
                    581:                    ((part >= lp->d_npartitions) ||
1.9       oster     582:                        (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1       oster     583:                        error = ENXIO;
                    584:                        raidunlock(rs);
1.9       oster     585:                        return (error);
1.1       oster     586:                }
                    587:        }
                    588:        /* Prevent this unit from being unconfigured while open. */
                    589:        switch (fmt) {
                    590:        case S_IFCHR:
                    591:                rs->sc_dkdev.dk_copenmask |= pmask;
                    592:                break;
                    593:
                    594:        case S_IFBLK:
                    595:                rs->sc_dkdev.dk_bopenmask |= pmask;
                    596:                break;
                    597:        }
1.13      oster     598:
1.186     perry     599:        if ((rs->sc_dkdev.dk_openmask == 0) &&
1.13      oster     600:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    601:                /* First one... mark things as dirty... Note that we *MUST*
                    602:                 have done a configure before this.  I DO NOT WANT TO BE
                    603:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    604:                 THAT THEY BELONG TOGETHER!!!!! */
                    605:                /* XXX should check to see if we're only open for reading
                    606:                   here... If so, we needn't do this, but then need some
                    607:                   other way of keeping track of what's happened.. */
                    608:
                    609:                rf_markalldirty( raidPtrs[unit] );
                    610:        }
                    611:
                    612:
1.1       oster     613:        rs->sc_dkdev.dk_openmask =
                    614:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
                    615:
                    616:        raidunlock(rs);
                    617:
1.9       oster     618:        return (error);
1.1       oster     619:
                    620:
                    621: }
                    622: /* ARGSUSED */
                    623: int
1.169     oster     624: raidclose(dev_t dev, int flags, int fmt, struct proc *p)
1.1       oster     625: {
1.9       oster     626:        int     unit = raidunit(dev);
1.1       oster     627:        struct raid_softc *rs;
1.9       oster     628:        int     error = 0;
                    629:        int     part;
1.1       oster     630:
                    631:        if (unit >= numraid)
                    632:                return (ENXIO);
                    633:        rs = &raid_softc[unit];
                    634:
                    635:        if ((error = raidlock(rs)) != 0)
                    636:                return (error);
                    637:
                    638:        part = DISKPART(dev);
                    639:
                    640:        /* ...that much closer to allowing unconfiguration... */
                    641:        switch (fmt) {
                    642:        case S_IFCHR:
                    643:                rs->sc_dkdev.dk_copenmask &= ~(1 << part);
                    644:                break;
                    645:
                    646:        case S_IFBLK:
                    647:                rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
                    648:                break;
                    649:        }
                    650:        rs->sc_dkdev.dk_openmask =
                    651:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.186     perry     652:
1.13      oster     653:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    654:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
1.186     perry     655:                /* Last one... device is not unconfigured yet.
                    656:                   Device shutdown has taken care of setting the
                    657:                   clean bits if RAIDF_INITED is not set
1.13      oster     658:                   mark things as clean... */
1.147     oster     659:
1.91      oster     660:                rf_update_component_labels(raidPtrs[unit],
                    661:                                                 RF_FINAL_COMPONENT_UPDATE);
1.107     oster     662:                if (doing_shutdown) {
                    663:                        /* last one, and we're going down, so
                    664:                           lights out for this RAID set too. */
                    665:                        error = rf_Shutdown(raidPtrs[unit]);
1.186     perry     666:
1.107     oster     667:                        /* It's no longer initialized... */
                    668:                        rs->sc_flags &= ~RAIDF_INITED;
1.186     perry     669:
1.107     oster     670:                        /* Detach the disk. */
1.188     yamt      671:                        pseudo_disk_detach(&rs->sc_dkdev);
1.107     oster     672:                }
1.13      oster     673:        }
1.1       oster     674:
                    675:        raidunlock(rs);
                    676:        return (0);
                    677:
                    678: }
                    679:
                    680: void
1.169     oster     681: raidstrategy(struct buf *bp)
1.1       oster     682: {
1.74      augustss  683:        int s;
1.1       oster     684:
                    685:        unsigned int raidID = raidunit(bp->b_dev);
                    686:        RF_Raid_t *raidPtr;
                    687:        struct raid_softc *rs = &raid_softc[raidID];
1.9       oster     688:        int     wlabel;
1.1       oster     689:
1.30      oster     690:        if ((rs->sc_flags & RAIDF_INITED) ==0) {
                    691:                bp->b_error = ENXIO;
1.100     chs       692:                bp->b_flags |= B_ERROR;
1.30      oster     693:                bp->b_resid = bp->b_bcount;
                    694:                biodone(bp);
1.1       oster     695:                return;
1.30      oster     696:        }
1.1       oster     697:        if (raidID >= numraid || !raidPtrs[raidID]) {
                    698:                bp->b_error = ENODEV;
                    699:                bp->b_flags |= B_ERROR;
                    700:                bp->b_resid = bp->b_bcount;
                    701:                biodone(bp);
                    702:                return;
                    703:        }
                    704:        raidPtr = raidPtrs[raidID];
                    705:        if (!raidPtr->valid) {
                    706:                bp->b_error = ENODEV;
                    707:                bp->b_flags |= B_ERROR;
                    708:                bp->b_resid = bp->b_bcount;
                    709:                biodone(bp);
                    710:                return;
                    711:        }
                    712:        if (bp->b_bcount == 0) {
                    713:                db1_printf(("b_bcount is zero..\n"));
                    714:                biodone(bp);
                    715:                return;
                    716:        }
                    717:
                    718:        /*
                    719:         * Do bounds checking and adjust transfer.  If there's an
                    720:         * error, the bounds check will flag that for us.
                    721:         */
                    722:
1.9       oster     723:        wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.1       oster     724:        if (DISKPART(bp->b_dev) != RAW_PART)
1.159     thorpej   725:                if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
1.1       oster     726:                        db1_printf(("Bounds check failed!!:%d %d\n",
1.9       oster     727:                                (int) bp->b_blkno, (int) wlabel));
1.1       oster     728:                        biodone(bp);
                    729:                        return;
                    730:                }
1.34      oster     731:        s = splbio();
1.1       oster     732:
                    733:        bp->b_resid = 0;
1.34      oster     734:
                    735:        /* stuff it onto our queue */
1.125     hannken   736:        BUFQ_PUT(&rs->buf_queue, bp);
1.34      oster     737:
1.190   ! oster     738:        /* scheduled the IO to happen at the next convenient time */
        !           739:        wakeup(&(raidPtrs[raidID]->iodone));
1.34      oster     740:
1.1       oster     741:        splx(s);
                    742: }
                    743: /* ARGSUSED */
                    744: int
1.169     oster     745: raidread(dev_t dev, struct uio *uio, int flags)
1.1       oster     746: {
1.9       oster     747:        int     unit = raidunit(dev);
1.1       oster     748:        struct raid_softc *rs;
                    749:
                    750:        if (unit >= numraid)
                    751:                return (ENXIO);
                    752:        rs = &raid_softc[unit];
                    753:
                    754:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    755:                return (ENXIO);
                    756:
                    757:        return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
                    758:
                    759: }
                    760: /* ARGSUSED */
                    761: int
1.169     oster     762: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1       oster     763: {
1.9       oster     764:        int     unit = raidunit(dev);
1.1       oster     765:        struct raid_softc *rs;
                    766:
                    767:        if (unit >= numraid)
                    768:                return (ENXIO);
                    769:        rs = &raid_softc[unit];
                    770:
                    771:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    772:                return (ENXIO);
1.147     oster     773:
1.1       oster     774:        return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
                    775:
                    776: }
                    777:
                    778: int
1.169     oster     779: raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1.1       oster     780: {
1.9       oster     781:        int     unit = raidunit(dev);
                    782:        int     error = 0;
                    783:        int     part, pmask;
1.1       oster     784:        struct raid_softc *rs;
                    785:        RF_Config_t *k_cfg, *u_cfg;
1.42      oster     786:        RF_Raid_t *raidPtr;
1.48      oster     787:        RF_RaidDisk_t *diskPtr;
1.41      oster     788:        RF_AccTotals_t *totals;
                    789:        RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1       oster     790:        u_char *specific_buf;
1.11      oster     791:        int retcode = 0;
                    792:        int column;
1.123     oster     793:        int raidid;
1.1       oster     794:        struct rf_recon_req *rrcopy, *rr;
1.48      oster     795:        RF_ComponentLabel_t *clabel;
1.11      oster     796:        RF_ComponentLabel_t ci_label;
1.48      oster     797:        RF_ComponentLabel_t **clabel_ptr;
1.12      oster     798:        RF_SingleComponent_t *sparePtr,*componentPtr;
                    799:        RF_SingleComponent_t hot_spare;
                    800:        RF_SingleComponent_t component;
1.83      oster     801:        RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41      oster     802:        int i, j, d;
1.102     fvdl      803: #ifdef __HAVE_OLD_DISKLABEL
                    804:        struct disklabel newlabel;
                    805: #endif
1.1       oster     806:
                    807:        if (unit >= numraid)
                    808:                return (ENXIO);
                    809:        rs = &raid_softc[unit];
1.42      oster     810:        raidPtr = raidPtrs[unit];
1.1       oster     811:
1.9       oster     812:        db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
                    813:                (int) DISKPART(dev), (int) unit, (int) cmd));
1.1       oster     814:
                    815:        /* Must be open for writes for these commands... */
                    816:        switch (cmd) {
                    817:        case DIOCSDINFO:
                    818:        case DIOCWDINFO:
1.102     fvdl      819: #ifdef __HAVE_OLD_DISKLABEL
                    820:        case ODIOCWDINFO:
                    821:        case ODIOCSDINFO:
                    822: #endif
1.1       oster     823:        case DIOCWLABEL:
                    824:                if ((flag & FWRITE) == 0)
                    825:                        return (EBADF);
                    826:        }
                    827:
                    828:        /* Must be initialized for these... */
                    829:        switch (cmd) {
                    830:        case DIOCGDINFO:
                    831:        case DIOCSDINFO:
                    832:        case DIOCWDINFO:
1.102     fvdl      833: #ifdef __HAVE_OLD_DISKLABEL
                    834:        case ODIOCGDINFO:
                    835:        case ODIOCWDINFO:
                    836:        case ODIOCSDINFO:
                    837:        case ODIOCGDEFLABEL:
                    838: #endif
1.1       oster     839:        case DIOCGPART:
                    840:        case DIOCWLABEL:
                    841:        case DIOCGDEFLABEL:
                    842:        case RAIDFRAME_SHUTDOWN:
                    843:        case RAIDFRAME_REWRITEPARITY:
                    844:        case RAIDFRAME_GET_INFO:
                    845:        case RAIDFRAME_RESET_ACCTOTALS:
                    846:        case RAIDFRAME_GET_ACCTOTALS:
                    847:        case RAIDFRAME_KEEP_ACCTOTALS:
                    848:        case RAIDFRAME_GET_SIZE:
                    849:        case RAIDFRAME_FAIL_DISK:
                    850:        case RAIDFRAME_COPYBACK:
1.37      oster     851:        case RAIDFRAME_CHECK_RECON_STATUS:
1.83      oster     852:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11      oster     853:        case RAIDFRAME_GET_COMPONENT_LABEL:
                    854:        case RAIDFRAME_SET_COMPONENT_LABEL:
                    855:        case RAIDFRAME_ADD_HOT_SPARE:
                    856:        case RAIDFRAME_REMOVE_HOT_SPARE:
                    857:        case RAIDFRAME_INIT_LABELS:
1.12      oster     858:        case RAIDFRAME_REBUILD_IN_PLACE:
1.23      oster     859:        case RAIDFRAME_CHECK_PARITY:
1.37      oster     860:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83      oster     861:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37      oster     862:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83      oster     863:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48      oster     864:        case RAIDFRAME_SET_AUTOCONFIG:
                    865:        case RAIDFRAME_SET_ROOT:
1.73      oster     866:        case RAIDFRAME_DELETE_COMPONENT:
                    867:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.1       oster     868:                if ((rs->sc_flags & RAIDF_INITED) == 0)
                    869:                        return (ENXIO);
                    870:        }
1.9       oster     871:
1.1       oster     872:        switch (cmd) {
                    873:
                    874:                /* configure the system */
                    875:        case RAIDFRAME_CONFIGURE:
1.48      oster     876:
                    877:                if (raidPtr->valid) {
                    878:                        /* There is a valid RAID set running on this unit! */
                    879:                        printf("raid%d: Device already configured!\n",unit);
1.66      oster     880:                        return(EINVAL);
1.48      oster     881:                }
                    882:
1.1       oster     883:                /* copy-in the configuration information */
                    884:                /* data points to a pointer to the configuration structure */
1.43      oster     885:
1.9       oster     886:                u_cfg = *((RF_Config_t **) data);
                    887:                RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1       oster     888:                if (k_cfg == NULL) {
1.9       oster     889:                        return (ENOMEM);
1.1       oster     890:                }
1.156     dsl       891:                retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1.1       oster     892:                if (retcode) {
1.33      oster     893:                        RF_Free(k_cfg, sizeof(RF_Config_t));
1.46      oster     894:                        db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9       oster     895:                                retcode));
                    896:                        return (retcode);
1.1       oster     897:                }
1.9       oster     898:                /* allocate a buffer for the layout-specific data, and copy it
                    899:                 * in */
1.1       oster     900:                if (k_cfg->layoutSpecificSize) {
1.9       oster     901:                        if (k_cfg->layoutSpecificSize > 10000) {
1.1       oster     902:                                /* sanity check */
1.33      oster     903:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.9       oster     904:                                return (EINVAL);
1.1       oster     905:                        }
1.9       oster     906:                        RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
                    907:                            (u_char *));
1.1       oster     908:                        if (specific_buf == NULL) {
1.9       oster     909:                                RF_Free(k_cfg, sizeof(RF_Config_t));
                    910:                                return (ENOMEM);
1.1       oster     911:                        }
1.156     dsl       912:                        retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1.9       oster     913:                            k_cfg->layoutSpecificSize);
1.1       oster     914:                        if (retcode) {
1.33      oster     915:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.186     perry     916:                                RF_Free(specific_buf,
1.42      oster     917:                                        k_cfg->layoutSpecificSize);
1.46      oster     918:                                db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9       oster     919:                                        retcode));
                    920:                                return (retcode);
1.1       oster     921:                        }
1.9       oster     922:                } else
                    923:                        specific_buf = NULL;
1.1       oster     924:                k_cfg->layoutSpecific = specific_buf;
1.9       oster     925:
                    926:                /* should do some kind of sanity check on the configuration.
                    927:                 * Store the sum of all the bytes in the last byte? */
1.1       oster     928:
                    929:                /* configure the system */
                    930:
1.48      oster     931:                /*
                    932:                 * Clear the entire RAID descriptor, just to make sure
1.186     perry     933:                 *  there is no stale data left in the case of a
                    934:                 *  reconfiguration
1.48      oster     935:                 */
1.108     thorpej   936:                memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1.42      oster     937:                raidPtr->raidid = unit;
1.20      oster     938:
1.48      oster     939:                retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1       oster     940:
1.40      oster     941:                if (retcode == 0) {
1.37      oster     942:
1.186     perry     943:                        /* allow this many simultaneous IO's to
1.40      oster     944:                           this RAID device */
1.42      oster     945:                        raidPtr->openings = RAIDOUTSTANDING;
1.186     perry     946:
1.59      oster     947:                        raidinit(raidPtr);
                    948:                        rf_markalldirty(raidPtr);
1.9       oster     949:                }
1.1       oster     950:                /* free the buffers.  No return code here. */
                    951:                if (k_cfg->layoutSpecificSize) {
1.9       oster     952:                        RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1       oster     953:                }
1.9       oster     954:                RF_Free(k_cfg, sizeof(RF_Config_t));
                    955:
                    956:                return (retcode);
                    957:
                    958:                /* shutdown the system */
1.1       oster     959:        case RAIDFRAME_SHUTDOWN:
1.9       oster     960:
                    961:                if ((error = raidlock(rs)) != 0)
                    962:                        return (error);
1.1       oster     963:
                    964:                /*
                    965:                 * If somebody has a partition mounted, we shouldn't
                    966:                 * shutdown.
                    967:                 */
                    968:
                    969:                part = DISKPART(dev);
                    970:                pmask = (1 << part);
1.9       oster     971:                if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
                    972:                    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
                    973:                        (rs->sc_dkdev.dk_copenmask & pmask))) {
                    974:                        raidunlock(rs);
                    975:                        return (EBUSY);
                    976:                }
1.11      oster     977:
1.42      oster     978:                retcode = rf_Shutdown(raidPtr);
1.1       oster     979:
                    980:                /* It's no longer initialized... */
                    981:                rs->sc_flags &= ~RAIDF_INITED;
1.16      oster     982:
1.9       oster     983:                /* Detach the disk. */
1.189     oster     984:                pseudo_disk_detach(&rs->sc_dkdev);
1.1       oster     985:
                    986:                raidunlock(rs);
                    987:
1.9       oster     988:                return (retcode);
1.11      oster     989:        case RAIDFRAME_GET_COMPONENT_LABEL:
1.48      oster     990:                clabel_ptr = (RF_ComponentLabel_t **) data;
1.11      oster     991:                /* need to read the component label for the disk indicated
1.48      oster     992:                   by row,column in clabel */
1.11      oster     993:
1.186     perry     994:                /* For practice, let's get it directly fromdisk, rather
1.11      oster     995:                   than from the in-core copy */
1.48      oster     996:                RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1.11      oster     997:                           (RF_ComponentLabel_t *));
1.48      oster     998:                if (clabel == NULL)
1.11      oster     999:                        return (ENOMEM);
                   1000:
1.108     thorpej  1001:                memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1.186     perry    1002:
                   1003:                retcode = copyin( *clabel_ptr, clabel,
1.11      oster    1004:                                  sizeof(RF_ComponentLabel_t));
                   1005:
                   1006:                if (retcode) {
1.48      oster    1007:                        RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    1008:                        return(retcode);
                   1009:                }
                   1010:
1.166     oster    1011:                clabel->row = 0; /* Don't allow looking at anything else.*/
                   1012:
1.48      oster    1013:                column = clabel->column;
1.26      oster    1014:
1.166     oster    1015:                if ((column < 0) || (column >= raidPtr->numCol +
1.90      oster    1016:                                     raidPtr->numSpare)) {
1.48      oster    1017:                        RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.26      oster    1018:                        return(EINVAL);
1.11      oster    1019:                }
                   1020:
1.186     perry    1021:                raidread_component_label(raidPtr->Disks[column].dev,
                   1022:                                raidPtr->raid_cinfo[column].ci_vp,
1.48      oster    1023:                                clabel );
1.11      oster    1024:
1.156     dsl      1025:                retcode = copyout(clabel, *clabel_ptr,
1.11      oster    1026:                                  sizeof(RF_ComponentLabel_t));
1.156     dsl      1027:                RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    1028:                return (retcode);
                   1029:
                   1030:        case RAIDFRAME_SET_COMPONENT_LABEL:
1.48      oster    1031:                clabel = (RF_ComponentLabel_t *) data;
1.11      oster    1032:
                   1033:                /* XXX check the label for valid stuff... */
                   1034:                /* Note that some things *should not* get modified --
1.186     perry    1035:                   the user should be re-initing the labels instead of
1.11      oster    1036:                   trying to patch things.
                   1037:                   */
                   1038:
1.123     oster    1039:                raidid = raidPtr->raidid;
1.174     oster    1040: #if DEBUG
1.123     oster    1041:                printf("raid%d: Got component label:\n", raidid);
                   1042:                printf("raid%d: Version: %d\n", raidid, clabel->version);
                   1043:                printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
                   1044:                printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
                   1045:                printf("raid%d: Column: %d\n", raidid, clabel->column);
                   1046:                printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
                   1047:                printf("raid%d: Clean: %d\n", raidid, clabel->clean);
                   1048:                printf("raid%d: Status: %d\n", raidid, clabel->status);
1.174     oster    1049: #endif
1.166     oster    1050:                clabel->row = 0;
1.48      oster    1051:                column = clabel->column;
1.12      oster    1052:
1.166     oster    1053:                if ((column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1054:                        return(EINVAL);
1.11      oster    1055:                }
1.12      oster    1056:
                   1057:                /* XXX this isn't allowed to do anything for now :-) */
1.48      oster    1058:
                   1059:                /* XXX and before it is, we need to fill in the rest
                   1060:                   of the fields!?!?!?! */
1.12      oster    1061: #if 0
1.186     perry    1062:                raidwrite_component_label(
                   1063:                             raidPtr->Disks[column].dev,
                   1064:                            raidPtr->raid_cinfo[column].ci_vp,
1.48      oster    1065:                            clabel );
1.12      oster    1066: #endif
                   1067:                return (0);
1.11      oster    1068:
1.186     perry    1069:        case RAIDFRAME_INIT_LABELS:
1.48      oster    1070:                clabel = (RF_ComponentLabel_t *) data;
1.186     perry    1071:                /*
1.11      oster    1072:                   we only want the serial number from
                   1073:                   the above.  We get all the rest of the information
                   1074:                   from the config that was used to create this RAID
1.186     perry    1075:                   set.
1.11      oster    1076:                   */
1.12      oster    1077:
1.48      oster    1078:                raidPtr->serial_number = clabel->serial_number;
1.186     perry    1079:
1.51      oster    1080:                raid_init_component_label(raidPtr, &ci_label);
                   1081:                ci_label.serial_number = clabel->serial_number;
1.166     oster    1082:                ci_label.row = 0; /* we dont' pretend to support more */
1.11      oster    1083:
1.166     oster    1084:                for(column=0;column<raidPtr->numCol;column++) {
                   1085:                        diskPtr = &raidPtr->Disks[column];
                   1086:                        if (!RF_DEAD_DISK(diskPtr->status)) {
                   1087:                                ci_label.partitionSize = diskPtr->partitionSize;
                   1088:                                ci_label.column = column;
1.186     perry    1089:                                raidwrite_component_label(
                   1090:                                                          raidPtr->Disks[column].dev,
                   1091:                                                          raidPtr->raid_cinfo[column].ci_vp,
1.166     oster    1092:                                                          &ci_label );
1.11      oster    1093:                        }
                   1094:                }
                   1095:
                   1096:                return (retcode);
1.48      oster    1097:        case RAIDFRAME_SET_AUTOCONFIG:
1.78      minoura  1098:                d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186     perry    1099:                printf("raid%d: New autoconfig value is: %d\n",
1.123     oster    1100:                       raidPtr->raidid, d);
1.78      minoura  1101:                *(int *) data = d;
1.48      oster    1102:                return (retcode);
                   1103:
                   1104:        case RAIDFRAME_SET_ROOT:
1.78      minoura  1105:                d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186     perry    1106:                printf("raid%d: New rootpartition value is: %d\n",
1.123     oster    1107:                       raidPtr->raidid, d);
1.78      minoura  1108:                *(int *) data = d;
1.48      oster    1109:                return (retcode);
1.9       oster    1110:
1.1       oster    1111:                /* initialize all parity */
                   1112:        case RAIDFRAME_REWRITEPARITY:
                   1113:
1.42      oster    1114:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17      oster    1115:                        /* Parity for RAID 0 is trivially correct */
1.42      oster    1116:                        raidPtr->parity_good = RF_RAID_CLEAN;
1.17      oster    1117:                        return(0);
                   1118:                }
1.186     perry    1119:
1.42      oster    1120:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.37      oster    1121:                        /* Re-write is already in progress! */
                   1122:                        return(EINVAL);
                   1123:                }
1.27      oster    1124:
1.42      oster    1125:                retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37      oster    1126:                                           rf_RewriteParityThread,
1.42      oster    1127:                                           raidPtr,"raid_parity");
1.9       oster    1128:                return (retcode);
                   1129:
1.11      oster    1130:
                   1131:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster    1132:                sparePtr = (RF_SingleComponent_t *) data;
                   1133:                memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1.42      oster    1134:                retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1.11      oster    1135:                return(retcode);
                   1136:
                   1137:        case RAIDFRAME_REMOVE_HOT_SPARE:
1.73      oster    1138:                return(retcode);
                   1139:
                   1140:        case RAIDFRAME_DELETE_COMPONENT:
                   1141:                componentPtr = (RF_SingleComponent_t *)data;
1.186     perry    1142:                memcpy( &component, componentPtr,
1.73      oster    1143:                        sizeof(RF_SingleComponent_t));
                   1144:                retcode = rf_delete_component(raidPtr, &component);
                   1145:                return(retcode);
                   1146:
                   1147:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1148:                componentPtr = (RF_SingleComponent_t *)data;
1.186     perry    1149:                memcpy( &component, componentPtr,
1.73      oster    1150:                        sizeof(RF_SingleComponent_t));
                   1151:                retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11      oster    1152:                return(retcode);
                   1153:
1.12      oster    1154:        case RAIDFRAME_REBUILD_IN_PLACE:
1.24      oster    1155:
1.42      oster    1156:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1157:                        /* Can't do this on a RAID 0!! */
                   1158:                        return(EINVAL);
                   1159:                }
                   1160:
1.42      oster    1161:                if (raidPtr->recon_in_progress == 1) {
1.37      oster    1162:                        /* a reconstruct is already in progress! */
                   1163:                        return(EINVAL);
                   1164:                }
                   1165:
1.12      oster    1166:                componentPtr = (RF_SingleComponent_t *) data;
1.186     perry    1167:                memcpy( &component, componentPtr,
1.12      oster    1168:                        sizeof(RF_SingleComponent_t));
1.166     oster    1169:                component.row = 0; /* we don't support any more */
1.12      oster    1170:                column = component.column;
1.147     oster    1171:
1.166     oster    1172:                if ((column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1173:                        return(EINVAL);
                   1174:                }
1.37      oster    1175:
1.149     oster    1176:                RF_LOCK_MUTEX(raidPtr->mutex);
1.166     oster    1177:                if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1.186     perry    1178:                    (raidPtr->numFailures > 0)) {
1.149     oster    1179:                        /* XXX 0 above shouldn't be constant!!! */
                   1180:                        /* some component other than this has failed.
                   1181:                           Let's not make things worse than they already
                   1182:                           are... */
                   1183:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1184:                               raidPtr->raidid);
1.166     oster    1185:                        printf("raid%d:     Col: %d   Too many failures.\n",
                   1186:                               raidPtr->raidid, column);
1.149     oster    1187:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1188:                        return (EINVAL);
                   1189:                }
1.186     perry    1190:                if (raidPtr->Disks[column].status ==
1.149     oster    1191:                    rf_ds_reconstructing) {
                   1192:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1193:                               raidPtr->raidid);
1.166     oster    1194:                        printf("raid%d:    Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, column);
1.186     perry    1195:
1.149     oster    1196:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1197:                        return (EINVAL);
                   1198:                }
1.166     oster    1199:                if (raidPtr->Disks[column].status == rf_ds_spared) {
1.149     oster    1200:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1201:                        return (EINVAL);
                   1202:                }
                   1203:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1204:
1.37      oster    1205:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1206:                if (rrcopy == NULL)
                   1207:                        return(ENOMEM);
1.37      oster    1208:
1.42      oster    1209:                rrcopy->raidPtr = (void *) raidPtr;
1.37      oster    1210:                rrcopy->col = column;
                   1211:
1.42      oster    1212:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1213:                                           rf_ReconstructInPlaceThread,
                   1214:                                           rrcopy,"raid_reconip");
1.12      oster    1215:                return(retcode);
                   1216:
1.1       oster    1217:        case RAIDFRAME_GET_INFO:
1.42      oster    1218:                if (!raidPtr->valid)
1.41      oster    1219:                        return (ENODEV);
                   1220:                ucfgp = (RF_DeviceConfig_t **) data;
                   1221:                RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
                   1222:                          (RF_DeviceConfig_t *));
                   1223:                if (d_cfg == NULL)
                   1224:                        return (ENOMEM);
1.108     thorpej  1225:                memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1.166     oster    1226:                d_cfg->rows = 1; /* there is only 1 row now */
1.42      oster    1227:                d_cfg->cols = raidPtr->numCol;
1.166     oster    1228:                d_cfg->ndevs = raidPtr->numCol;
1.41      oster    1229:                if (d_cfg->ndevs >= RF_MAX_DISKS) {
                   1230:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1231:                        return (ENOMEM);
                   1232:                }
1.42      oster    1233:                d_cfg->nspares = raidPtr->numSpare;
1.41      oster    1234:                if (d_cfg->nspares >= RF_MAX_DISKS) {
                   1235:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1236:                        return (ENOMEM);
                   1237:                }
1.42      oster    1238:                d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41      oster    1239:                d = 0;
1.166     oster    1240:                for (j = 0; j < d_cfg->cols; j++) {
                   1241:                        d_cfg->devs[d] = raidPtr->Disks[j];
                   1242:                        d++;
1.41      oster    1243:                }
                   1244:                for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.166     oster    1245:                        d_cfg->spares[i] = raidPtr->Disks[j];
1.41      oster    1246:                }
1.156     dsl      1247:                retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1.41      oster    1248:                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1249:
                   1250:                return (retcode);
1.9       oster    1251:
1.22      oster    1252:        case RAIDFRAME_CHECK_PARITY:
1.42      oster    1253:                *(int *) data = raidPtr->parity_good;
1.22      oster    1254:                return (0);
1.41      oster    1255:
1.1       oster    1256:        case RAIDFRAME_RESET_ACCTOTALS:
1.108     thorpej  1257:                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41      oster    1258:                return (0);
1.9       oster    1259:
1.1       oster    1260:        case RAIDFRAME_GET_ACCTOTALS:
1.41      oster    1261:                totals = (RF_AccTotals_t *) data;
1.42      oster    1262:                *totals = raidPtr->acc_totals;
1.41      oster    1263:                return (0);
1.9       oster    1264:
1.1       oster    1265:        case RAIDFRAME_KEEP_ACCTOTALS:
1.42      oster    1266:                raidPtr->keep_acc_totals = *(int *)data;
1.41      oster    1267:                return (0);
1.9       oster    1268:
1.1       oster    1269:        case RAIDFRAME_GET_SIZE:
1.42      oster    1270:                *(int *) data = raidPtr->totalSectors;
1.9       oster    1271:                return (0);
1.1       oster    1272:
                   1273:                /* fail a disk & optionally start reconstruction */
                   1274:        case RAIDFRAME_FAIL_DISK:
1.24      oster    1275:
1.42      oster    1276:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1277:                        /* Can't do this on a RAID 0!! */
                   1278:                        return(EINVAL);
                   1279:                }
                   1280:
1.1       oster    1281:                rr = (struct rf_recon_req *) data;
1.166     oster    1282:                rr->row = 0;
                   1283:                if (rr->col < 0 || rr->col >= raidPtr->numCol)
1.9       oster    1284:                        return (EINVAL);
1.149     oster    1285:
                   1286:
                   1287:                RF_LOCK_MUTEX(raidPtr->mutex);
1.185     oster    1288:                if (raidPtr->status == rf_rs_reconstructing) {
                   1289:                        /* you can't fail a disk while we're reconstructing! */
                   1290:                        /* XXX wrong for RAID6 */
                   1291:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1292:                        return (EINVAL);
                   1293:                }
1.186     perry    1294:                if ((raidPtr->Disks[rr->col].status ==
                   1295:                     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1.149     oster    1296:                        /* some other component has failed.  Let's not make
                   1297:                           things worse. XXX wrong for RAID6 */
                   1298:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1299:                        return (EINVAL);
                   1300:                }
1.166     oster    1301:                if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1.149     oster    1302:                        /* Can't fail a spared disk! */
                   1303:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1304:                        return (EINVAL);
                   1305:                }
                   1306:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1       oster    1307:
1.9       oster    1308:                /* make a copy of the recon request so that we don't rely on
                   1309:                 * the user's buffer */
1.1       oster    1310:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1311:                if (rrcopy == NULL)
                   1312:                        return(ENOMEM);
1.118     wiz      1313:                memcpy(rrcopy, rr, sizeof(*rr));
1.42      oster    1314:                rrcopy->raidPtr = (void *) raidPtr;
1.1       oster    1315:
1.42      oster    1316:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1317:                                           rf_ReconThread,
                   1318:                                           rrcopy,"raid_recon");
1.9       oster    1319:                return (0);
                   1320:
                   1321:                /* invoke a copyback operation after recon on whatever disk
                   1322:                 * needs it, if any */
                   1323:        case RAIDFRAME_COPYBACK:
1.24      oster    1324:
1.42      oster    1325:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1326:                        /* This makes no sense on a RAID 0!! */
                   1327:                        return(EINVAL);
                   1328:                }
                   1329:
1.42      oster    1330:                if (raidPtr->copyback_in_progress == 1) {
1.37      oster    1331:                        /* Copyback is already in progress! */
                   1332:                        return(EINVAL);
                   1333:                }
1.27      oster    1334:
1.42      oster    1335:                retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37      oster    1336:                                           rf_CopybackThread,
1.42      oster    1337:                                           raidPtr,"raid_copyback");
1.37      oster    1338:                return (retcode);
1.9       oster    1339:
1.1       oster    1340:                /* return the percentage completion of reconstruction */
1.37      oster    1341:        case RAIDFRAME_CHECK_RECON_STATUS:
1.42      oster    1342:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71      oster    1343:                        /* This makes no sense on a RAID 0, so tell the
                   1344:                           user it's done. */
                   1345:                        *(int *) data = 100;
                   1346:                        return(0);
1.24      oster    1347:                }
1.166     oster    1348:                if (raidPtr->status != rf_rs_reconstructing)
1.1       oster    1349:                        *(int *) data = 100;
1.171     oster    1350:                else {
                   1351:                        if (raidPtr->reconControl->numRUsTotal > 0) {
                   1352:                                *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
                   1353:                        } else {
                   1354:                                *(int *) data = 0;
                   1355:                        }
                   1356:                }
1.9       oster    1357:                return (0);
1.83      oster    1358:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                   1359:                progressInfoPtr = (RF_ProgressInfo_t **) data;
1.166     oster    1360:                if (raidPtr->status != rf_rs_reconstructing) {
1.83      oster    1361:                        progressInfo.remaining = 0;
                   1362:                        progressInfo.completed = 100;
                   1363:                        progressInfo.total = 100;
                   1364:                } else {
1.186     perry    1365:                        progressInfo.total =
1.166     oster    1366:                                raidPtr->reconControl->numRUsTotal;
1.186     perry    1367:                        progressInfo.completed =
1.166     oster    1368:                                raidPtr->reconControl->numRUsComplete;
1.83      oster    1369:                        progressInfo.remaining = progressInfo.total -
                   1370:                                progressInfo.completed;
                   1371:                }
1.156     dsl      1372:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1373:                                  sizeof(RF_ProgressInfo_t));
                   1374:                return (retcode);
1.9       oster    1375:
1.37      oster    1376:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42      oster    1377:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80      oster    1378:                        /* This makes no sense on a RAID 0, so tell the
                   1379:                           user it's done. */
                   1380:                        *(int *) data = 100;
                   1381:                        return(0);
1.37      oster    1382:                }
1.42      oster    1383:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.186     perry    1384:                        *(int *) data = 100 *
                   1385:                                raidPtr->parity_rewrite_stripes_done /
1.83      oster    1386:                                raidPtr->Layout.numStripe;
1.37      oster    1387:                } else {
                   1388:                        *(int *) data = 100;
                   1389:                }
                   1390:                return (0);
                   1391:
1.83      oster    1392:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                   1393:                progressInfoPtr = (RF_ProgressInfo_t **) data;
                   1394:                if (raidPtr->parity_rewrite_in_progress == 1) {
                   1395:                        progressInfo.total = raidPtr->Layout.numStripe;
1.186     perry    1396:                        progressInfo.completed =
1.83      oster    1397:                                raidPtr->parity_rewrite_stripes_done;
                   1398:                        progressInfo.remaining = progressInfo.total -
                   1399:                                progressInfo.completed;
                   1400:                } else {
                   1401:                        progressInfo.remaining = 0;
                   1402:                        progressInfo.completed = 100;
                   1403:                        progressInfo.total = 100;
                   1404:                }
1.156     dsl      1405:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1406:                                  sizeof(RF_ProgressInfo_t));
                   1407:                return (retcode);
                   1408:
1.37      oster    1409:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42      oster    1410:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37      oster    1411:                        /* This makes no sense on a RAID 0 */
1.83      oster    1412:                        *(int *) data = 100;
                   1413:                        return(0);
1.37      oster    1414:                }
1.42      oster    1415:                if (raidPtr->copyback_in_progress == 1) {
                   1416:                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                   1417:                                raidPtr->Layout.numStripe;
1.37      oster    1418:                } else {
                   1419:                        *(int *) data = 100;
                   1420:                }
                   1421:                return (0);
                   1422:
1.83      oster    1423:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93      oster    1424:                progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83      oster    1425:                if (raidPtr->copyback_in_progress == 1) {
                   1426:                        progressInfo.total = raidPtr->Layout.numStripe;
1.186     perry    1427:                        progressInfo.completed =
1.93      oster    1428:                                raidPtr->copyback_stripes_done;
1.83      oster    1429:                        progressInfo.remaining = progressInfo.total -
                   1430:                                progressInfo.completed;
                   1431:                } else {
                   1432:                        progressInfo.remaining = 0;
                   1433:                        progressInfo.completed = 100;
                   1434:                        progressInfo.total = 100;
                   1435:                }
1.156     dsl      1436:                retcode = copyout(&progressInfo, *progressInfoPtr,
1.83      oster    1437:                                  sizeof(RF_ProgressInfo_t));
                   1438:                return (retcode);
1.37      oster    1439:
1.9       oster    1440:                /* the sparetable daemon calls this to wait for the kernel to
                   1441:                 * need a spare table. this ioctl does not return until a
                   1442:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1443:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1444:                 * -- I should either compute the spare table in the kernel,
                   1445:                 * or have a different -- XXX XXX -- interface (a different
1.42      oster    1446:                 * character device) for delivering the table     -- XXX */
1.1       oster    1447: #if 0
                   1448:        case RAIDFRAME_SPARET_WAIT:
                   1449:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1450:                while (!rf_sparet_wait_queue)
                   1451:                        mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1.1       oster    1452:                waitreq = rf_sparet_wait_queue;
                   1453:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
                   1454:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1455:
1.42      oster    1456:                /* structure assignment */
1.186     perry    1457:                *((RF_SparetWait_t *) data) = *waitreq;
1.9       oster    1458:
1.1       oster    1459:                RF_Free(waitreq, sizeof(*waitreq));
1.9       oster    1460:                return (0);
                   1461:
                   1462:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1463:                 * code in it that will cause the dameon to exit */
1.1       oster    1464:        case RAIDFRAME_ABORT_SPARET_WAIT:
                   1465:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1466:                waitreq->fcol = -1;
                   1467:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1468:                waitreq->next = rf_sparet_wait_queue;
                   1469:                rf_sparet_wait_queue = waitreq;
                   1470:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1471:                wakeup(&rf_sparet_wait_queue);
1.9       oster    1472:                return (0);
1.1       oster    1473:
1.9       oster    1474:                /* used by the spare table daemon to deliver a spare table
                   1475:                 * into the kernel */
1.1       oster    1476:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1477:
1.1       oster    1478:                /* install the spare table */
1.42      oster    1479:                retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9       oster    1480:
                   1481:                /* respond to the requestor.  the return status of the spare
                   1482:                 * table installation is passed in the "fcol" field */
1.1       oster    1483:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1484:                waitreq->fcol = retcode;
                   1485:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1486:                waitreq->next = rf_sparet_resp_queue;
                   1487:                rf_sparet_resp_queue = waitreq;
                   1488:                wakeup(&rf_sparet_resp_queue);
                   1489:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1490:
                   1491:                return (retcode);
1.1       oster    1492: #endif
                   1493:
1.9       oster    1494:        default:
1.36      oster    1495:                break; /* fall through to the os-specific code below */
1.1       oster    1496:
                   1497:        }
1.9       oster    1498:
1.42      oster    1499:        if (!raidPtr->valid)
1.9       oster    1500:                return (EINVAL);
                   1501:
1.1       oster    1502:        /*
                   1503:         * Add support for "regular" device ioctls here.
                   1504:         */
1.9       oster    1505:
1.1       oster    1506:        switch (cmd) {
                   1507:        case DIOCGDINFO:
1.9       oster    1508:                *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1       oster    1509:                break;
1.102     fvdl     1510: #ifdef __HAVE_OLD_DISKLABEL
                   1511:        case ODIOCGDINFO:
                   1512:                newlabel = *(rs->sc_dkdev.dk_label);
                   1513:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1514:                        return ENOTTY;
1.102     fvdl     1515:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1516:                break;
                   1517: #endif
1.1       oster    1518:
                   1519:        case DIOCGPART:
1.9       oster    1520:                ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
                   1521:                ((struct partinfo *) data)->part =
1.1       oster    1522:                    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
                   1523:                break;
                   1524:
                   1525:        case DIOCWDINFO:
                   1526:        case DIOCSDINFO:
1.102     fvdl     1527: #ifdef __HAVE_OLD_DISKLABEL
                   1528:        case ODIOCWDINFO:
                   1529:        case ODIOCSDINFO:
                   1530: #endif
                   1531:        {
                   1532:                struct disklabel *lp;
                   1533: #ifdef __HAVE_OLD_DISKLABEL
                   1534:                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                   1535:                        memset(&newlabel, 0, sizeof newlabel);
                   1536:                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                   1537:                        lp = &newlabel;
                   1538:                } else
                   1539: #endif
                   1540:                lp = (struct disklabel *)data;
                   1541:
1.1       oster    1542:                if ((error = raidlock(rs)) != 0)
                   1543:                        return (error);
                   1544:
                   1545:                rs->sc_flags |= RAIDF_LABELLING;
                   1546:
                   1547:                error = setdisklabel(rs->sc_dkdev.dk_label,
1.102     fvdl     1548:                    lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1       oster    1549:                if (error == 0) {
1.102     fvdl     1550:                        if (cmd == DIOCWDINFO
                   1551: #ifdef __HAVE_OLD_DISKLABEL
                   1552:                            || cmd == ODIOCWDINFO
                   1553: #endif
                   1554:                           )
1.1       oster    1555:                                error = writedisklabel(RAIDLABELDEV(dev),
                   1556:                                    raidstrategy, rs->sc_dkdev.dk_label,
                   1557:                                    rs->sc_dkdev.dk_cpulabel);
                   1558:                }
                   1559:                rs->sc_flags &= ~RAIDF_LABELLING;
                   1560:
                   1561:                raidunlock(rs);
                   1562:
                   1563:                if (error)
                   1564:                        return (error);
                   1565:                break;
1.102     fvdl     1566:        }
1.1       oster    1567:
                   1568:        case DIOCWLABEL:
1.9       oster    1569:                if (*(int *) data != 0)
1.1       oster    1570:                        rs->sc_flags |= RAIDF_WLABEL;
                   1571:                else
                   1572:                        rs->sc_flags &= ~RAIDF_WLABEL;
                   1573:                break;
                   1574:
                   1575:        case DIOCGDEFLABEL:
1.102     fvdl     1576:                raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1       oster    1577:                break;
1.102     fvdl     1578:
                   1579: #ifdef __HAVE_OLD_DISKLABEL
                   1580:        case ODIOCGDEFLABEL:
                   1581:                raidgetdefaultlabel(raidPtr, rs, &newlabel);
                   1582:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1583:                        return ENOTTY;
1.102     fvdl     1584:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1585:                break;
                   1586: #endif
1.1       oster    1587:
                   1588:        default:
1.39      oster    1589:                retcode = ENOTTY;
1.1       oster    1590:        }
1.9       oster    1591:        return (retcode);
1.1       oster    1592:
                   1593: }
                   1594:
                   1595:
1.9       oster    1596: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1597:    RAIDframe device.  */
                   1598:
                   1599:
1.59      oster    1600: static void
1.169     oster    1601: raidinit(RF_Raid_t *raidPtr)
1.1       oster    1602: {
                   1603:        struct raid_softc *rs;
1.59      oster    1604:        int     unit;
1.1       oster    1605:
1.59      oster    1606:        unit = raidPtr->raidid;
1.1       oster    1607:
                   1608:        rs = &raid_softc[unit];
                   1609:
                   1610:        /* XXX should check return code first... */
                   1611:        rs->sc_flags |= RAIDF_INITED;
                   1612:
1.179     itojun   1613:        /* XXX doesn't check bounds. */
                   1614:        snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1.1       oster    1615:
1.9       oster    1616:        rs->sc_dkdev.dk_name = rs->sc_xname;
1.11      oster    1617:
1.1       oster    1618:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1619:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1620:         * with disklabels. */
1.11      oster    1621:
1.188     yamt     1622:        pseudo_disk_attach(&rs->sc_dkdev);
1.1       oster    1623:
                   1624:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1625:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1626:
1.9       oster    1627:        rs->sc_size = raidPtr->totalSectors;
1.1       oster    1628: }
1.150     oster    1629: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1       oster    1630: /* wake up the daemon & tell it to get us a spare table
                   1631:  * XXX
1.9       oster    1632:  * the entries in the queues should be tagged with the raidPtr
1.186     perry    1633:  * so that in the extremely rare case that two recons happen at once,
1.11      oster    1634:  * we know for which device were requesting a spare table
1.1       oster    1635:  * XXX
1.186     perry    1636:  *
1.39      oster    1637:  * XXX This code is not currently used. GO
1.1       oster    1638:  */
1.186     perry    1639: int
1.169     oster    1640: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9       oster    1641: {
                   1642:        int     retcode;
                   1643:
                   1644:        RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1645:        req->next = rf_sparet_wait_queue;
                   1646:        rf_sparet_wait_queue = req;
                   1647:        wakeup(&rf_sparet_wait_queue);
                   1648:
                   1649:        /* mpsleep unlocks the mutex */
                   1650:        while (!rf_sparet_resp_queue) {
1.15      oster    1651:                tsleep(&rf_sparet_resp_queue, PRIBIO,
1.9       oster    1652:                    "raidframe getsparetable", 0);
                   1653:        }
                   1654:        req = rf_sparet_resp_queue;
                   1655:        rf_sparet_resp_queue = req->next;
                   1656:        RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1657:
                   1658:        retcode = req->fcol;
                   1659:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1660:                                         * alloc'd */
                   1661:        return (retcode);
1.1       oster    1662: }
1.150     oster    1663: #endif
1.39      oster    1664:
1.186     perry    1665: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11      oster    1666:  * bp & passes it down.
1.1       oster    1667:  * any calls originating in the kernel must use non-blocking I/O
                   1668:  * do some extra sanity checking to return "appropriate" error values for
                   1669:  * certain conditions (to make some standard utilities work)
1.186     perry    1670:  *
1.34      oster    1671:  * Formerly known as: rf_DoAccessKernel
1.1       oster    1672:  */
1.34      oster    1673: void
1.169     oster    1674: raidstart(RF_Raid_t *raidPtr)
1.1       oster    1675: {
                   1676:        RF_SectorCount_t num_blocks, pb, sum;
                   1677:        RF_RaidAddr_t raid_addr;
                   1678:        struct partition *pp;
1.9       oster    1679:        daddr_t blocknum;
                   1680:        int     unit;
1.1       oster    1681:        struct raid_softc *rs;
1.9       oster    1682:        int     do_async;
1.34      oster    1683:        struct buf *bp;
1.180     oster    1684:        int rc;
1.1       oster    1685:
                   1686:        unit = raidPtr->raidid;
                   1687:        rs = &raid_softc[unit];
1.186     perry    1688:
1.56      oster    1689:        /* quick check to see if anything has died recently */
                   1690:        RF_LOCK_MUTEX(raidPtr->mutex);
                   1691:        if (raidPtr->numNewFailures > 0) {
1.151     oster    1692:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.186     perry    1693:                rf_update_component_labels(raidPtr,
1.91      oster    1694:                                           RF_NORMAL_COMPONENT_UPDATE);
1.151     oster    1695:                RF_LOCK_MUTEX(raidPtr->mutex);
1.56      oster    1696:                raidPtr->numNewFailures--;
                   1697:        }
                   1698:
1.34      oster    1699:        /* Check to see if we're at the limit... */
                   1700:        while (raidPtr->openings > 0) {
                   1701:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1702:
                   1703:                /* get the next item, if any, from the queue */
1.125     hannken  1704:                if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1.34      oster    1705:                        /* nothing more to do */
                   1706:                        return;
                   1707:                }
                   1708:
                   1709:                /* Ok, for the bp we have here, bp->b_blkno is relative to the
1.186     perry    1710:                 * partition.. Need to make it absolute to the underlying
1.34      oster    1711:                 * device.. */
1.1       oster    1712:
1.34      oster    1713:                blocknum = bp->b_blkno;
                   1714:                if (DISKPART(bp->b_dev) != RAW_PART) {
                   1715:                        pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
                   1716:                        blocknum += pp->p_offset;
                   1717:                }
1.1       oster    1718:
1.186     perry    1719:                db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1.34      oster    1720:                            (int) blocknum));
1.186     perry    1721:
1.34      oster    1722:                db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   1723:                db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.186     perry    1724:
                   1725:                /* *THIS* is where we adjust what block we're going to...
1.34      oster    1726:                 * but DO NOT TOUCH bp->b_blkno!!! */
                   1727:                raid_addr = blocknum;
1.186     perry    1728:
1.34      oster    1729:                num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
                   1730:                pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
                   1731:                sum = raid_addr + num_blocks + pb;
                   1732:                if (1 || rf_debugKernelAccess) {
                   1733:                        db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   1734:                                    (int) raid_addr, (int) sum, (int) num_blocks,
                   1735:                                    (int) pb, (int) bp->b_resid));
                   1736:                }
                   1737:                if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
                   1738:                    || (sum < num_blocks) || (sum < pb)) {
                   1739:                        bp->b_error = ENOSPC;
                   1740:                        bp->b_flags |= B_ERROR;
                   1741:                        bp->b_resid = bp->b_bcount;
                   1742:                        biodone(bp);
                   1743:                        RF_LOCK_MUTEX(raidPtr->mutex);
                   1744:                        continue;
                   1745:                }
                   1746:                /*
                   1747:                 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   1748:                 */
1.186     perry    1749:
1.34      oster    1750:                if (bp->b_bcount & raidPtr->sectorMask) {
                   1751:                        bp->b_error = EINVAL;
                   1752:                        bp->b_flags |= B_ERROR;
                   1753:                        bp->b_resid = bp->b_bcount;
                   1754:                        biodone(bp);
                   1755:                        RF_LOCK_MUTEX(raidPtr->mutex);
                   1756:                        continue;
1.186     perry    1757:
1.34      oster    1758:                }
                   1759:                db1_printf(("Calling DoAccess..\n"));
1.186     perry    1760:
1.1       oster    1761:
1.34      oster    1762:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1763:                raidPtr->openings--;
                   1764:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1       oster    1765:
1.34      oster    1766:                /*
                   1767:                 * Everything is async.
                   1768:                 */
                   1769:                do_async = 1;
1.186     perry    1770:
1.99      oster    1771:                disk_busy(&rs->sc_dkdev);
                   1772:
1.186     perry    1773:                /* XXX we're still at splbio() here... do we *really*
1.34      oster    1774:                   need to be? */
1.20      oster    1775:
1.186     perry    1776:                /* don't ever condition on bp->b_flags & B_WRITE.
1.99      oster    1777:                 * always condition on B_READ instead */
1.186     perry    1778:
1.180     oster    1779:                rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                   1780:                                 RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   1781:                                 do_async, raid_addr, num_blocks,
                   1782:                                 bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.151     oster    1783:
1.180     oster    1784:                if (rc) {
                   1785:                        bp->b_error = rc;
1.151     oster    1786:                        bp->b_flags |= B_ERROR;
1.180     oster    1787:                        bp->b_resid = bp->b_bcount;
                   1788:                        biodone(bp);
                   1789:                        /* continue loop */
1.186     perry    1790:                }
1.20      oster    1791:
                   1792:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1793:        }
1.34      oster    1794:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1795: }
1.20      oster    1796:
                   1797:
1.7       explorer 1798:
                   1799:
1.1       oster    1800: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   1801:
1.186     perry    1802: int
1.169     oster    1803: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1       oster    1804: {
1.9       oster    1805:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    1806:        struct buf *bp;
1.9       oster    1807:        struct raidbuf *raidbp = NULL;
                   1808:
1.1       oster    1809:        req->queue = queue;
1.9       oster    1810:
1.134     oster    1811: #if DIAGNOSTIC
                   1812:        if (queue->raidPtr->raidid >= numraid) {
1.137     itojun   1813:                printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
                   1814:                    numraid);
1.141     provos   1815:                panic("Invalid Unit number in rf_DispatchKernelIO");
1.1       oster    1816:        }
1.134     oster    1817: #endif
1.1       oster    1818:
                   1819:        bp = req->bp;
1.16      oster    1820: #if 1
1.9       oster    1821:        /* XXX when there is a physical disk failure, someone is passing us a
                   1822:         * buffer that contains old stuff!!  Attempt to deal with this problem
                   1823:         * without taking a performance hit... (not sure where the real bug
                   1824:         * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
1.4       oster    1825:
                   1826:        if (bp->b_flags & B_ERROR) {
                   1827:                bp->b_flags &= ~B_ERROR;
                   1828:        }
1.9       oster    1829:        if (bp->b_error != 0) {
1.4       oster    1830:                bp->b_error = 0;
                   1831:        }
1.16      oster    1832: #endif
1.177     oster    1833:        raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1.154     pk       1834:        if (raidbp == NULL) {
                   1835:                bp->b_flags |= B_ERROR;
                   1836:                bp->b_error = ENOMEM;
                   1837:                return (ENOMEM);
                   1838:        }
1.155     thorpej  1839:        BUF_INIT(&raidbp->rf_buf);
1.1       oster    1840:
                   1841:        /*
                   1842:         * context for raidiodone
                   1843:         */
                   1844:        raidbp->rf_obp = bp;
                   1845:        raidbp->req = req;
1.32      oster    1846:
1.172     yamt     1847:        BIO_COPYPRIO(&raidbp->rf_buf, bp);
                   1848:
1.1       oster    1849:        switch (req->type) {
1.9       oster    1850:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
1.1       oster    1851:                /* XXX need to do something extra here.. */
1.9       oster    1852:                /* I'm leaving this in, as I've never actually seen it used,
                   1853:                 * and I'd like folks to report it... GO */
1.1       oster    1854:                printf(("WAKEUP CALLED\n"));
                   1855:                queue->numOutstanding++;
                   1856:
                   1857:                /* XXX need to glue the original buffer into this??  */
                   1858:
                   1859:                KernelWakeupFunc(&raidbp->rf_buf);
                   1860:                break;
1.9       oster    1861:
1.1       oster    1862:        case RF_IO_TYPE_READ:
                   1863:        case RF_IO_TYPE_WRITE:
1.175     oster    1864: #if RF_ACC_TRACE > 0
1.1       oster    1865:                if (req->tracerec) {
                   1866:                        RF_ETIMER_START(req->tracerec->timer);
                   1867:                }
1.175     oster    1868: #endif
1.9       oster    1869:                InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
                   1870:                    op | bp->b_flags, queue->rf_cinfo->ci_dev,
                   1871:                    req->sectorOffset, req->numSector,
                   1872:                    req->buf, KernelWakeupFunc, (void *) req,
                   1873:                    queue->raidPtr->logBytesPerSector, req->b_proc);
1.1       oster    1874:
                   1875:                if (rf_debugKernelAccess) {
1.9       oster    1876:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   1877:                                (long) bp->b_blkno));
1.1       oster    1878:                }
                   1879:                queue->numOutstanding++;
                   1880:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    1881:                /* acc wouldn't have been let in if there were any pending
                   1882:                 * reqs at any other priority */
1.1       oster    1883:                queue->curPriority = req->priority;
                   1884:
1.166     oster    1885:                db1_printf(("Going for %c to unit %d col %d\n",
1.186     perry    1886:                            req->type, queue->raidPtr->raidid,
1.166     oster    1887:                            queue->col));
1.1       oster    1888:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    1889:                        (int) req->sectorOffset, (int) req->numSector,
                   1890:                        (int) (req->numSector <<
                   1891:                            queue->raidPtr->logBytesPerSector),
                   1892:                        (int) queue->raidPtr->logBytesPerSector));
1.1       oster    1893:                if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
                   1894:                        raidbp->rf_buf.b_vp->v_numoutput++;
                   1895:                }
1.173     hannken  1896:                VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1.1       oster    1897:
                   1898:                break;
1.9       oster    1899:
1.1       oster    1900:        default:
                   1901:                panic("bad req->type in rf_DispatchKernelIO");
                   1902:        }
                   1903:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.134     oster    1904:
1.9       oster    1905:        return (0);
1.1       oster    1906: }
1.9       oster    1907: /* this is the callback function associated with a I/O invoked from
1.1       oster    1908:    kernel code.
                   1909:  */
1.186     perry    1910: static void
1.169     oster    1911: KernelWakeupFunc(struct buf *vbp)
1.9       oster    1912: {
                   1913:        RF_DiskQueueData_t *req = NULL;
                   1914:        RF_DiskQueue_t *queue;
                   1915:        struct raidbuf *raidbp = (struct raidbuf *) vbp;
                   1916:        struct buf *bp;
1.74      augustss 1917:        int s;
1.9       oster    1918:
1.36      oster    1919:        s = splbio();
1.9       oster    1920:        db1_printf(("recovering the request queue:\n"));
                   1921:        req = raidbp->req;
1.1       oster    1922:
1.9       oster    1923:        bp = raidbp->rf_obp;
1.1       oster    1924:
1.9       oster    1925:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    1926:
1.9       oster    1927:        if (raidbp->rf_buf.b_flags & B_ERROR) {
                   1928:                bp->b_flags |= B_ERROR;
                   1929:                bp->b_error = raidbp->rf_buf.b_error ?
                   1930:                    raidbp->rf_buf.b_error : EIO;
                   1931:        }
1.1       oster    1932:
1.9       oster    1933:        /* XXX methinks this could be wrong... */
1.1       oster    1934: #if 1
1.9       oster    1935:        bp->b_resid = raidbp->rf_buf.b_resid;
1.1       oster    1936: #endif
1.175     oster    1937: #if RF_ACC_TRACE > 0
1.9       oster    1938:        if (req->tracerec) {
                   1939:                RF_ETIMER_STOP(req->tracerec->timer);
                   1940:                RF_ETIMER_EVAL(req->tracerec->timer);
                   1941:                RF_LOCK_MUTEX(rf_tracing_mutex);
                   1942:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1943:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1944:                req->tracerec->num_phys_ios++;
                   1945:                RF_UNLOCK_MUTEX(rf_tracing_mutex);
                   1946:        }
1.175     oster    1947: #endif
1.9       oster    1948:        bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1.1       oster    1949:
1.9       oster    1950:        /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
                   1951:         * ballistic, and mark the component as hosed... */
1.36      oster    1952:
1.9       oster    1953:        if (bp->b_flags & B_ERROR) {
                   1954:                /* Mark the disk as dead */
                   1955:                /* but only mark it once... */
1.186     perry    1956:                /* and only if it wouldn't leave this RAID set
1.183     oster    1957:                   completely broken */
                   1958:                if ((queue->raidPtr->Disks[queue->col].status ==
1.186     perry    1959:                    rf_ds_optimal) && (queue->raidPtr->numFailures <
1.183     oster    1960:                                       queue->raidPtr->Layout.map->faultsTolerated)) {
1.9       oster    1961:                        printf("raid%d: IO Error.  Marking %s as failed.\n",
1.136     oster    1962:                               queue->raidPtr->raidid,
1.166     oster    1963:                               queue->raidPtr->Disks[queue->col].devname);
                   1964:                        queue->raidPtr->Disks[queue->col].status =
1.9       oster    1965:                            rf_ds_failed;
1.166     oster    1966:                        queue->raidPtr->status = rf_rs_degraded;
1.9       oster    1967:                        queue->raidPtr->numFailures++;
1.56      oster    1968:                        queue->raidPtr->numNewFailures++;
1.9       oster    1969:                } else {        /* Disk is already dead... */
                   1970:                        /* printf("Disk already marked as dead!\n"); */
                   1971:                }
1.4       oster    1972:
1.9       oster    1973:        }
1.4       oster    1974:
1.177     oster    1975:        pool_put(&rf_pools.cbuf, raidbp);
1.9       oster    1976:
1.143     oster    1977:        /* Fill in the error value */
                   1978:
                   1979:        req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
                   1980:
                   1981:        simple_lock(&queue->raidPtr->iodone_lock);
                   1982:
                   1983:        /* Drop this one on the "finished" queue... */
                   1984:        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
                   1985:
                   1986:        /* Let the raidio thread know there is work to be done. */
                   1987:        wakeup(&(queue->raidPtr->iodone));
                   1988:
                   1989:        simple_unlock(&queue->raidPtr->iodone_lock);
1.1       oster    1990:
1.36      oster    1991:        splx(s);
1.1       oster    1992: }
                   1993:
                   1994:
                   1995:
                   1996: /*
                   1997:  * initialize a buf structure for doing an I/O in the kernel.
                   1998:  */
1.186     perry    1999: static void
1.169     oster    2000: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.187     christos 2001:        RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1.169     oster    2002:        void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
                   2003:        struct proc *b_proc)
1.9       oster    2004: {
                   2005:        /* bp->b_flags       = B_PHYS | rw_flag; */
                   2006:        bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
                   2007:        bp->b_bcount = numSect << logBytesPerSector;
                   2008:        bp->b_bufsize = bp->b_bcount;
                   2009:        bp->b_error = 0;
                   2010:        bp->b_dev = dev;
1.187     christos 2011:        bp->b_data = bf;
1.9       oster    2012:        bp->b_blkno = startSect;
                   2013:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
1.1       oster    2014:        if (bp->b_bcount == 0) {
1.141     provos   2015:                panic("bp->b_bcount is zero in InitBP!!");
1.1       oster    2016:        }
1.161     fvdl     2017:        bp->b_proc = b_proc;
1.9       oster    2018:        bp->b_iodone = cbFunc;
                   2019:        bp->b_vp = b_vp;
                   2020:
1.1       oster    2021: }
                   2022:
                   2023: static void
1.186     perry    2024: raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1.169     oster    2025:                    struct disklabel *lp)
1.1       oster    2026: {
1.108     thorpej  2027:        memset(lp, 0, sizeof(*lp));
1.1       oster    2028:
                   2029:        /* fabricate a label... */
                   2030:        lp->d_secperunit = raidPtr->totalSectors;
                   2031:        lp->d_secsize = raidPtr->bytesPerSector;
1.45      oster    2032:        lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105     oster    2033:        lp->d_ntracks = 4 * raidPtr->numCol;
1.186     perry    2034:        lp->d_ncylinders = raidPtr->totalSectors /
1.45      oster    2035:                (lp->d_nsectors * lp->d_ntracks);
1.1       oster    2036:        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
                   2037:
                   2038:        strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9       oster    2039:        lp->d_type = DTYPE_RAID;
1.1       oster    2040:        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
                   2041:        lp->d_rpm = 3600;
                   2042:        lp->d_interleave = 1;
                   2043:        lp->d_flags = 0;
                   2044:
                   2045:        lp->d_partitions[RAW_PART].p_offset = 0;
                   2046:        lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
                   2047:        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
                   2048:        lp->d_npartitions = RAW_PART + 1;
                   2049:
                   2050:        lp->d_magic = DISKMAGIC;
                   2051:        lp->d_magic2 = DISKMAGIC;
                   2052:        lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
                   2053:
                   2054: }
                   2055: /*
                   2056:  * Read the disklabel from the raid device.  If one is not present, fake one
                   2057:  * up.
                   2058:  */
                   2059: static void
1.169     oster    2060: raidgetdisklabel(dev_t dev)
1.1       oster    2061: {
1.9       oster    2062:        int     unit = raidunit(dev);
1.1       oster    2063:        struct raid_softc *rs = &raid_softc[unit];
1.158     dsl      2064:        const char   *errstring;
1.1       oster    2065:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   2066:        struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
                   2067:        RF_Raid_t *raidPtr;
                   2068:
                   2069:        db1_printf(("Getting the disklabel...\n"));
                   2070:
1.108     thorpej  2071:        memset(clp, 0, sizeof(*clp));
1.1       oster    2072:
                   2073:        raidPtr = raidPtrs[unit];
                   2074:
                   2075:        raidgetdefaultlabel(raidPtr, rs, lp);
                   2076:
                   2077:        /*
                   2078:         * Call the generic disklabel extraction routine.
                   2079:         */
                   2080:        errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
                   2081:            rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9       oster    2082:        if (errstring)
1.1       oster    2083:                raidmakedisklabel(rs);
                   2084:        else {
1.9       oster    2085:                int     i;
1.1       oster    2086:                struct partition *pp;
                   2087:
                   2088:                /*
                   2089:                 * Sanity check whether the found disklabel is valid.
                   2090:                 *
                   2091:                 * This is necessary since total size of the raid device
                   2092:                 * may vary when an interleave is changed even though exactly
                   2093:                 * same componets are used, and old disklabel may used
                   2094:                 * if that is found.
                   2095:                 */
                   2096:                if (lp->d_secperunit != rs->sc_size)
1.123     oster    2097:                        printf("raid%d: WARNING: %s: "
1.1       oster    2098:                            "total sector size in disklabel (%d) != "
1.123     oster    2099:                            "the size of raid (%ld)\n", unit, rs->sc_xname,
1.18      oster    2100:                            lp->d_secperunit, (long) rs->sc_size);
1.1       oster    2101:                for (i = 0; i < lp->d_npartitions; i++) {
                   2102:                        pp = &lp->d_partitions[i];
                   2103:                        if (pp->p_offset + pp->p_size > rs->sc_size)
1.123     oster    2104:                                printf("raid%d: WARNING: %s: end of partition `%c' "
1.186     perry    2105:                                       "exceeds the size of raid (%ld)\n",
1.123     oster    2106:                                       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
1.1       oster    2107:                }
                   2108:        }
                   2109:
                   2110: }
                   2111: /*
                   2112:  * Take care of things one might want to take care of in the event
                   2113:  * that a disklabel isn't present.
                   2114:  */
                   2115: static void
1.169     oster    2116: raidmakedisklabel(struct raid_softc *rs)
1.1       oster    2117: {
                   2118:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   2119:        db1_printf(("Making a label..\n"));
                   2120:
                   2121:        /*
                   2122:         * For historical reasons, if there's no disklabel present
                   2123:         * the raw partition must be marked FS_BSDFFS.
                   2124:         */
                   2125:
                   2126:        lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
                   2127:
                   2128:        strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
                   2129:
                   2130:        lp->d_checksum = dkcksum(lp);
                   2131: }
                   2132: /*
                   2133:  * Lookup the provided name in the filesystem.  If the file exists,
                   2134:  * is a valid block device, and isn't being used by anyone else,
                   2135:  * set *vpp to the file's vnode.
1.9       oster    2136:  * You'll find the original of this in ccd.c
1.1       oster    2137:  */
                   2138: int
1.169     oster    2139: raidlookup(char *path, struct proc *p, struct vnode **vpp)
1.1       oster    2140: {
                   2141:        struct nameidata nd;
                   2142:        struct vnode *vp;
                   2143:        struct vattr va;
1.9       oster    2144:        int     error;
1.1       oster    2145:
1.161     fvdl     2146:        NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1.9       oster    2147:        if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1.1       oster    2148:                return (error);
                   2149:        }
                   2150:        vp = nd.ni_vp;
                   2151:        if (vp->v_usecount > 1) {
                   2152:                VOP_UNLOCK(vp, 0);
1.161     fvdl     2153:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2154:                return (EBUSY);
                   2155:        }
1.161     fvdl     2156:        if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1.1       oster    2157:                VOP_UNLOCK(vp, 0);
1.161     fvdl     2158:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2159:                return (error);
                   2160:        }
                   2161:        /* XXX: eventually we should handle VREG, too. */
                   2162:        if (va.va_type != VBLK) {
                   2163:                VOP_UNLOCK(vp, 0);
1.161     fvdl     2164:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2165:                return (ENOTBLK);
                   2166:        }
                   2167:        VOP_UNLOCK(vp, 0);
                   2168:        *vpp = vp;
                   2169:        return (0);
                   2170: }
                   2171: /*
                   2172:  * Wait interruptibly for an exclusive lock.
                   2173:  *
                   2174:  * XXX
                   2175:  * Several drivers do this; it should be abstracted and made MP-safe.
                   2176:  * (Hmm... where have we seen this warning before :->  GO )
                   2177:  */
                   2178: static int
1.169     oster    2179: raidlock(struct raid_softc *rs)
1.1       oster    2180: {
1.9       oster    2181:        int     error;
1.1       oster    2182:
                   2183:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   2184:                rs->sc_flags |= RAIDF_WANTED;
1.9       oster    2185:                if ((error =
                   2186:                        tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1       oster    2187:                        return (error);
                   2188:        }
                   2189:        rs->sc_flags |= RAIDF_LOCKED;
                   2190:        return (0);
                   2191: }
                   2192: /*
                   2193:  * Unlock and wake up any waiters.
                   2194:  */
                   2195: static void
1.169     oster    2196: raidunlock(struct raid_softc *rs)
1.1       oster    2197: {
                   2198:
                   2199:        rs->sc_flags &= ~RAIDF_LOCKED;
                   2200:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   2201:                rs->sc_flags &= ~RAIDF_WANTED;
                   2202:                wakeup(rs);
                   2203:        }
1.11      oster    2204: }
1.186     perry    2205:
1.11      oster    2206:
                   2207: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   2208: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
                   2209:
1.186     perry    2210: int
1.12      oster    2211: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
                   2212: {
1.48      oster    2213:        RF_ComponentLabel_t clabel;
                   2214:        raidread_component_label(dev, b_vp, &clabel);
                   2215:        clabel.mod_counter = mod_counter;
                   2216:        clabel.clean = RF_RAID_CLEAN;
                   2217:        raidwrite_component_label(dev, b_vp, &clabel);
1.12      oster    2218:        return(0);
                   2219: }
                   2220:
                   2221:
1.186     perry    2222: int
1.12      oster    2223: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1.11      oster    2224: {
1.48      oster    2225:        RF_ComponentLabel_t clabel;
                   2226:        raidread_component_label(dev, b_vp, &clabel);
                   2227:        clabel.mod_counter = mod_counter;
                   2228:        clabel.clean = RF_RAID_DIRTY;
                   2229:        raidwrite_component_label(dev, b_vp, &clabel);
1.11      oster    2230:        return(0);
                   2231: }
                   2232:
                   2233: /* ARGSUSED */
                   2234: int
1.186     perry    2235: raidread_component_label(dev_t dev, struct vnode *b_vp,
1.169     oster    2236:                         RF_ComponentLabel_t *clabel)
1.11      oster    2237: {
                   2238:        struct buf *bp;
1.130     gehenna  2239:        const struct bdevsw *bdev;
1.11      oster    2240:        int error;
1.186     perry    2241:
1.11      oster    2242:        /* XXX should probably ensure that we don't try to do this if
1.186     perry    2243:           someone has changed rf_protected_sectors. */
1.11      oster    2244:
1.98      oster    2245:        if (b_vp == NULL) {
                   2246:                /* For whatever reason, this component is not valid.
                   2247:                   Don't try to read a component label from it. */
                   2248:                return(EINVAL);
                   2249:        }
                   2250:
1.11      oster    2251:        /* get a block of the appropriate size... */
                   2252:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
                   2253:        bp->b_dev = dev;
                   2254:
                   2255:        /* get our ducks in a row for the read */
                   2256:        bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
                   2257:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100     chs      2258:        bp->b_flags |= B_READ;
1.11      oster    2259:        bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
                   2260:
1.130     gehenna  2261:        bdev = bdevsw_lookup(bp->b_dev);
                   2262:        if (bdev == NULL)
                   2263:                return (ENXIO);
                   2264:        (*bdev->d_strategy)(bp);
1.11      oster    2265:
1.186     perry    2266:        error = biowait(bp);
1.11      oster    2267:
                   2268:        if (!error) {
1.79      thorpej  2269:                memcpy(clabel, bp->b_data,
1.11      oster    2270:                       sizeof(RF_ComponentLabel_t));
1.186     perry    2271:         }
1.11      oster    2272:
1.186     perry    2273:        brelse(bp);
1.11      oster    2274:        return(error);
                   2275: }
                   2276: /* ARGSUSED */
1.186     perry    2277: int
                   2278: raidwrite_component_label(dev_t dev, struct vnode *b_vp,
1.169     oster    2279:                          RF_ComponentLabel_t *clabel)
1.11      oster    2280: {
                   2281:        struct buf *bp;
1.130     gehenna  2282:        const struct bdevsw *bdev;
1.11      oster    2283:        int error;
                   2284:
                   2285:        /* get a block of the appropriate size... */
                   2286:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
                   2287:        bp->b_dev = dev;
                   2288:
                   2289:        /* get our ducks in a row for the write */
                   2290:        bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
                   2291:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100     chs      2292:        bp->b_flags |= B_WRITE;
1.11      oster    2293:        bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
                   2294:
1.79      thorpej  2295:        memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
1.11      oster    2296:
1.79      thorpej  2297:        memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    2298:
1.130     gehenna  2299:        bdev = bdevsw_lookup(bp->b_dev);
                   2300:        if (bdev == NULL)
                   2301:                return (ENXIO);
                   2302:        (*bdev->d_strategy)(bp);
1.186     perry    2303:        error = biowait(bp);
1.11      oster    2304:        brelse(bp);
                   2305:        if (error) {
1.48      oster    2306: #if 1
1.11      oster    2307:                printf("Failed to write RAID component info!\n");
1.48      oster    2308: #endif
1.11      oster    2309:        }
                   2310:
                   2311:        return(error);
1.1       oster    2312: }
1.12      oster    2313:
1.186     perry    2314: void
1.169     oster    2315: rf_markalldirty(RF_Raid_t *raidPtr)
1.12      oster    2316: {
1.48      oster    2317:        RF_ComponentLabel_t clabel;
1.146     oster    2318:        int sparecol;
1.166     oster    2319:        int c;
                   2320:        int j;
                   2321:        int scol = -1;
1.12      oster    2322:
                   2323:        raidPtr->mod_counter++;
1.166     oster    2324:        for (c = 0; c < raidPtr->numCol; c++) {
                   2325:                /* we don't want to touch (at all) a disk that has
                   2326:                   failed */
                   2327:                if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
                   2328:                        raidread_component_label(
                   2329:                                                 raidPtr->Disks[c].dev,
                   2330:                                                 raidPtr->raid_cinfo[c].ci_vp,
                   2331:                                                 &clabel);
                   2332:                        if (clabel.status == rf_ds_spared) {
1.186     perry    2333:                                /* XXX do something special...
                   2334:                                   but whatever you do, don't
1.166     oster    2335:                                   try to access it!! */
                   2336:                        } else {
1.186     perry    2337:                                raidmarkdirty(
1.166     oster    2338:                                              raidPtr->Disks[c].dev,
                   2339:                                              raidPtr->raid_cinfo[c].ci_vp,
1.146     oster    2340:                                              raidPtr->mod_counter);
1.12      oster    2341:                        }
1.166     oster    2342:                }
1.186     perry    2343:        }
1.146     oster    2344:
1.12      oster    2345:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2346:                sparecol = raidPtr->numCol + c;
1.166     oster    2347:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2348:                        /*
                   2349:
                   2350:                           we claim this disk is "optimal" if it's
                   2351:                           rf_ds_used_spare, as that means it should be
                   2352:                           directly substitutable for the disk it replaced.
1.12      oster    2353:                           We note that too...
                   2354:
                   2355:                         */
                   2356:
1.166     oster    2357:                        for(j=0;j<raidPtr->numCol;j++) {
                   2358:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2359:                                        scol = j;
                   2360:                                        break;
1.12      oster    2361:                                }
                   2362:                        }
1.186     perry    2363:
                   2364:                        raidread_component_label(
1.166     oster    2365:                                 raidPtr->Disks[sparecol].dev,
                   2366:                                 raidPtr->raid_cinfo[sparecol].ci_vp,
1.146     oster    2367:                                 &clabel);
1.12      oster    2368:                        /* make sure status is noted */
1.146     oster    2369:
                   2370:                        raid_init_component_label(raidPtr, &clabel);
                   2371:
1.166     oster    2372:                        clabel.row = 0;
1.48      oster    2373:                        clabel.column = scol;
1.146     oster    2374:                        /* Note: we *don't* change status from rf_ds_used_spare
                   2375:                           to rf_ds_optimal */
                   2376:                        /* clabel.status = rf_ds_optimal; */
1.186     perry    2377:
1.166     oster    2378:                        raidmarkdirty(raidPtr->Disks[sparecol].dev,
                   2379:                                      raidPtr->raid_cinfo[sparecol].ci_vp,
1.146     oster    2380:                                      raidPtr->mod_counter);
1.12      oster    2381:                }
                   2382:        }
                   2383: }
                   2384:
1.13      oster    2385:
                   2386: void
1.169     oster    2387: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13      oster    2388: {
1.48      oster    2389:        RF_ComponentLabel_t clabel;
1.13      oster    2390:        int sparecol;
1.166     oster    2391:        int c;
                   2392:        int j;
                   2393:        int scol;
1.13      oster    2394:
                   2395:        scol = -1;
                   2396:
1.186     perry    2397:        /* XXX should do extra checks to make sure things really are clean,
1.13      oster    2398:           rather than blindly setting the clean bit... */
                   2399:
                   2400:        raidPtr->mod_counter++;
                   2401:
1.166     oster    2402:        for (c = 0; c < raidPtr->numCol; c++) {
                   2403:                if (raidPtr->Disks[c].status == rf_ds_optimal) {
                   2404:                        raidread_component_label(
                   2405:                                                 raidPtr->Disks[c].dev,
                   2406:                                                 raidPtr->raid_cinfo[c].ci_vp,
                   2407:                                                 &clabel);
1.13      oster    2408:                                /* make sure status is noted */
1.166     oster    2409:                        clabel.status = rf_ds_optimal;
1.57      oster    2410:                                /* bump the counter */
1.166     oster    2411:                        clabel.mod_counter = raidPtr->mod_counter;
1.57      oster    2412:
1.186     perry    2413:                        raidwrite_component_label(
1.166     oster    2414:                                                  raidPtr->Disks[c].dev,
                   2415:                                                  raidPtr->raid_cinfo[c].ci_vp,
                   2416:                                                  &clabel);
                   2417:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
                   2418:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.186     perry    2419:                                        raidmarkclean(
                   2420:                                                      raidPtr->Disks[c].dev,
1.166     oster    2421:                                                      raidPtr->raid_cinfo[c].ci_vp,
                   2422:                                                      raidPtr->mod_counter);
1.91      oster    2423:                                }
1.166     oster    2424:                        }
1.186     perry    2425:                }
1.166     oster    2426:                /* else we don't touch it.. */
1.186     perry    2427:        }
1.63      oster    2428:
                   2429:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2430:                sparecol = raidPtr->numCol + c;
1.110     oster    2431:                /* Need to ensure that the reconstruct actually completed! */
1.166     oster    2432:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186     perry    2433:                        /*
                   2434:
                   2435:                           we claim this disk is "optimal" if it's
                   2436:                           rf_ds_used_spare, as that means it should be
                   2437:                           directly substitutable for the disk it replaced.
1.63      oster    2438:                           We note that too...
                   2439:
                   2440:                         */
                   2441:
1.166     oster    2442:                        for(j=0;j<raidPtr->numCol;j++) {
                   2443:                                if (raidPtr->Disks[j].spareCol == sparecol) {
                   2444:                                        scol = j;
                   2445:                                        break;
1.63      oster    2446:                                }
                   2447:                        }
1.186     perry    2448:
1.63      oster    2449:                        /* XXX shouldn't *really* need this... */
1.186     perry    2450:                        raidread_component_label(
1.166     oster    2451:                                      raidPtr->Disks[sparecol].dev,
                   2452:                                      raidPtr->raid_cinfo[sparecol].ci_vp,
1.63      oster    2453:                                      &clabel);
                   2454:                        /* make sure status is noted */
                   2455:
                   2456:                        raid_init_component_label(raidPtr, &clabel);
                   2457:
                   2458:                        clabel.mod_counter = raidPtr->mod_counter;
                   2459:                        clabel.column = scol;
                   2460:                        clabel.status = rf_ds_optimal;
                   2461:
                   2462:                        raidwrite_component_label(
1.166     oster    2463:                                      raidPtr->Disks[sparecol].dev,
                   2464:                                      raidPtr->raid_cinfo[sparecol].ci_vp,
1.63      oster    2465:                                      &clabel);
1.91      oster    2466:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13      oster    2467:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.166     oster    2468:                                        raidmarkclean( raidPtr->Disks[sparecol].dev,
                   2469:                                                       raidPtr->raid_cinfo[sparecol].ci_vp,
1.91      oster    2470:                                                       raidPtr->mod_counter);
1.13      oster    2471:                                }
                   2472:                        }
                   2473:                }
                   2474:        }
1.68      oster    2475: }
                   2476:
                   2477: void
1.169     oster    2478: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69      oster    2479: {
                   2480:        struct proc *p;
                   2481:
                   2482:        p = raidPtr->engine_thread;
                   2483:
                   2484:        if (vp != NULL) {
                   2485:                if (auto_configured == 1) {
1.96      oster    2486:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2487:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.69      oster    2488:                        vput(vp);
1.186     perry    2489:
                   2490:                } else {
1.161     fvdl     2491:                        (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.69      oster    2492:                }
1.186     perry    2493:        }
1.69      oster    2494: }
                   2495:
                   2496:
                   2497: void
1.169     oster    2498: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68      oster    2499: {
1.186     perry    2500:        int r,c;
1.69      oster    2501:        struct vnode *vp;
                   2502:        int acd;
1.68      oster    2503:
                   2504:
                   2505:        /* We take this opportunity to close the vnodes like we should.. */
                   2506:
1.166     oster    2507:        for (c = 0; c < raidPtr->numCol; c++) {
                   2508:                vp = raidPtr->raid_cinfo[c].ci_vp;
                   2509:                acd = raidPtr->Disks[c].auto_configured;
                   2510:                rf_close_component(raidPtr, vp, acd);
                   2511:                raidPtr->raid_cinfo[c].ci_vp = NULL;
                   2512:                raidPtr->Disks[c].auto_configured = 0;
1.68      oster    2513:        }
1.166     oster    2514:
1.68      oster    2515:        for (r = 0; r < raidPtr->numSpare; r++) {
1.166     oster    2516:                vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
                   2517:                acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69      oster    2518:                rf_close_component(raidPtr, vp, acd);
1.166     oster    2519:                raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
                   2520:                raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68      oster    2521:        }
1.37      oster    2522: }
1.63      oster    2523:
1.37      oster    2524:
1.186     perry    2525: void
1.169     oster    2526: rf_ReconThread(struct rf_recon_req *req)
1.37      oster    2527: {
                   2528:        int     s;
                   2529:        RF_Raid_t *raidPtr;
                   2530:
                   2531:        s = splbio();
                   2532:        raidPtr = (RF_Raid_t *) req->raidPtr;
                   2533:        raidPtr->recon_in_progress = 1;
                   2534:
1.166     oster    2535:        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37      oster    2536:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
                   2537:
                   2538:        RF_Free(req, sizeof(*req));
                   2539:
                   2540:        raidPtr->recon_in_progress = 0;
                   2541:        splx(s);
                   2542:
                   2543:        /* That's all... */
                   2544:        kthread_exit(0);        /* does not return */
                   2545: }
                   2546:
                   2547: void
1.169     oster    2548: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37      oster    2549: {
                   2550:        int retcode;
                   2551:        int s;
                   2552:
1.184     oster    2553:        raidPtr->parity_rewrite_stripes_done = 0;
1.37      oster    2554:        raidPtr->parity_rewrite_in_progress = 1;
                   2555:        s = splbio();
                   2556:        retcode = rf_RewriteParity(raidPtr);
                   2557:        splx(s);
                   2558:        if (retcode) {
                   2559:                printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
                   2560:        } else {
                   2561:                /* set the clean bit!  If we shutdown correctly,
                   2562:                   the clean bit on each component label will get
                   2563:                   set */
                   2564:                raidPtr->parity_good = RF_RAID_CLEAN;
                   2565:        }
                   2566:        raidPtr->parity_rewrite_in_progress = 0;
1.85      oster    2567:
                   2568:        /* Anyone waiting for us to stop?  If so, inform them... */
                   2569:        if (raidPtr->waitShutdown) {
                   2570:                wakeup(&raidPtr->parity_rewrite_in_progress);
                   2571:        }
1.37      oster    2572:
                   2573:        /* That's all... */
                   2574:        kthread_exit(0);        /* does not return */
                   2575: }
                   2576:
                   2577:
                   2578: void
1.169     oster    2579: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37      oster    2580: {
                   2581:        int s;
                   2582:
                   2583:        raidPtr->copyback_in_progress = 1;
                   2584:        s = splbio();
                   2585:        rf_CopybackReconstructedData(raidPtr);
                   2586:        splx(s);
                   2587:        raidPtr->copyback_in_progress = 0;
                   2588:
                   2589:        /* That's all... */
                   2590:        kthread_exit(0);        /* does not return */
                   2591: }
                   2592:
                   2593:
                   2594: void
1.169     oster    2595: rf_ReconstructInPlaceThread(struct rf_recon_req *req)
1.37      oster    2596: {
                   2597:        int s;
                   2598:        RF_Raid_t *raidPtr;
1.186     perry    2599:
1.37      oster    2600:        s = splbio();
                   2601:        raidPtr = req->raidPtr;
                   2602:        raidPtr->recon_in_progress = 1;
1.166     oster    2603:        rf_ReconstructInPlace(raidPtr, req->col);
1.37      oster    2604:        RF_Free(req, sizeof(*req));
                   2605:        raidPtr->recon_in_progress = 0;
                   2606:        splx(s);
                   2607:
                   2608:        /* That's all... */
                   2609:        kthread_exit(0);        /* does not return */
1.48      oster    2610: }
                   2611:
                   2612: RF_AutoConfig_t *
                   2613: rf_find_raid_components()
                   2614: {
                   2615:        struct vnode *vp;
                   2616:        struct disklabel label;
                   2617:        struct device *dv;
                   2618:        dev_t dev;
1.130     gehenna  2619:        int bmajor;
1.48      oster    2620:        int error;
                   2621:        int i;
                   2622:        int good_one;
                   2623:        RF_ComponentLabel_t *clabel;
                   2624:        RF_AutoConfig_t *ac_list;
                   2625:        RF_AutoConfig_t *ac;
                   2626:
                   2627:
                   2628:        /* initialize the AutoConfig list */
                   2629:        ac_list = NULL;
                   2630:
                   2631:        /* we begin by trolling through *all* the devices on the system */
                   2632:
                   2633:        for (dv = alldevs.tqh_first; dv != NULL;
                   2634:             dv = dv->dv_list.tqe_next) {
                   2635:
                   2636:                /* we are only interested in disks... */
                   2637:                if (dv->dv_class != DV_DISK)
                   2638:                        continue;
                   2639:
                   2640:                /* we don't care about floppies... */
1.140     thorpej  2641:                if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
1.119     leo      2642:                        continue;
                   2643:                }
1.129     oster    2644:
                   2645:                /* we don't care about CD's... */
1.140     thorpej  2646:                if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
1.129     oster    2647:                        continue;
                   2648:                }
                   2649:
1.120     leo      2650:                /* hdfd is the Atari/Hades floppy driver */
1.140     thorpej  2651:                if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
1.121     leo      2652:                        continue;
                   2653:                }
                   2654:                /* fdisa is the Atari/Milan floppy driver */
1.140     thorpej  2655:                if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
1.48      oster    2656:                        continue;
                   2657:                }
1.186     perry    2658:
1.48      oster    2659:                /* need to find the device_name_to_block_device_major stuff */
1.130     gehenna  2660:                bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
1.48      oster    2661:
                   2662:                /* get a vnode for the raw partition of this disk */
                   2663:
1.130     gehenna  2664:                dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
1.48      oster    2665:                if (bdevvp(dev, &vp))
                   2666:                        panic("RAID can't alloc vnode");
                   2667:
                   2668:                error = VOP_OPEN(vp, FREAD, NOCRED, 0);
                   2669:
                   2670:                if (error) {
1.186     perry    2671:                        /* "Who cares."  Continue looking
1.48      oster    2672:                           for something that exists*/
                   2673:                        vput(vp);
                   2674:                        continue;
                   2675:                }
                   2676:
                   2677:                /* Ok, the disk exists.  Go get the disklabel. */
1.156     dsl      2678:                error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
1.48      oster    2679:                if (error) {
                   2680:                        /*
                   2681:                         * XXX can't happen - open() would
                   2682:                         * have errored out (or faked up one)
                   2683:                         */
1.181     thorpej  2684:                        if (error != ENOTTY)
                   2685:                                printf("RAIDframe: can't get label for dev "
                   2686:                                    "%s (%d)\n", dv->dv_xname, error);
1.48      oster    2687:                }
                   2688:
                   2689:                /* don't need this any more.  We'll allocate it again
                   2690:                   a little later if we really do... */
1.96      oster    2691:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2692:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48      oster    2693:                vput(vp);
                   2694:
1.181     thorpej  2695:                if (error)
                   2696:                        continue;
                   2697:
1.48      oster    2698:                for (i=0; i < label.d_npartitions; i++) {
                   2699:                        /* We only support partitions marked as RAID */
                   2700:                        if (label.d_partitions[i].p_fstype != FS_RAID)
                   2701:                                continue;
                   2702:
1.130     gehenna  2703:                        dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
1.48      oster    2704:                        if (bdevvp(dev, &vp))
                   2705:                                panic("RAID can't alloc vnode");
                   2706:
                   2707:                        error = VOP_OPEN(vp, FREAD, NOCRED, 0);
                   2708:                        if (error) {
                   2709:                                /* Whatever... */
                   2710:                                vput(vp);
                   2711:                                continue;
                   2712:                        }
                   2713:
                   2714:                        good_one = 0;
                   2715:
1.186     perry    2716:                        clabel = (RF_ComponentLabel_t *)
                   2717:                                malloc(sizeof(RF_ComponentLabel_t),
1.48      oster    2718:                                       M_RAIDFRAME, M_NOWAIT);
                   2719:                        if (clabel == NULL) {
                   2720:                                /* XXX CLEANUP HERE */
                   2721:                                printf("RAID auto config: out of memory!\n");
                   2722:                                return(NULL); /* XXX probably should panic? */
                   2723:                        }
                   2724:
                   2725:                        if (!raidread_component_label(dev, vp, clabel)) {
                   2726:                                /* Got the label.  Does it look reasonable? */
1.49      oster    2727:                                if (rf_reasonable_label(clabel) &&
1.186     perry    2728:                                    (clabel->partitionSize <=
1.48      oster    2729:                                     label.d_partitions[i].p_size)) {
                   2730: #if DEBUG
1.186     perry    2731:                                        printf("Component on: %s%c: %d\n",
1.48      oster    2732:                                               dv->dv_xname, 'a'+i,
                   2733:                                               label.d_partitions[i].p_size);
1.67      oster    2734:                                        rf_print_component_label(clabel);
1.48      oster    2735: #endif
1.186     perry    2736:                                        /* if it's reasonable, add it,
1.48      oster    2737:                                           else ignore it. */
                   2738:                                        ac = (RF_AutoConfig_t *)
                   2739:                                                malloc(sizeof(RF_AutoConfig_t),
                   2740:                                                       M_RAIDFRAME,
                   2741:                                                       M_NOWAIT);
                   2742:                                        if (ac == NULL) {
                   2743:                                                /* XXX should panic?? */
                   2744:                                                return(NULL);
                   2745:                                        }
1.186     perry    2746:
1.179     itojun   2747:                                        snprintf(ac->devname,
                   2748:                                            sizeof(ac->devname), "%s%c",
                   2749:                                            dv->dv_xname, 'a'+i);
1.48      oster    2750:                                        ac->dev = dev;
                   2751:                                        ac->vp = vp;
                   2752:                                        ac->clabel = clabel;
                   2753:                                        ac->next = ac_list;
                   2754:                                        ac_list = ac;
                   2755:                                        good_one = 1;
1.186     perry    2756:                                }
1.48      oster    2757:                        }
                   2758:                        if (!good_one) {
                   2759:                                /* cleanup */
                   2760:                                free(clabel, M_RAIDFRAME);
1.96      oster    2761:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2762:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48      oster    2763:                                vput(vp);
                   2764:                        }
                   2765:                }
                   2766:        }
1.106     oster    2767:        return(ac_list);
1.48      oster    2768: }
1.186     perry    2769:
1.48      oster    2770: static int
1.169     oster    2771: rf_reasonable_label(RF_ComponentLabel_t *clabel)
1.48      oster    2772: {
1.186     perry    2773:
1.48      oster    2774:        if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
                   2775:             (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
                   2776:            ((clabel->clean == RF_RAID_CLEAN) ||
                   2777:             (clabel->clean == RF_RAID_DIRTY)) &&
1.186     perry    2778:            clabel->row >=0 &&
                   2779:            clabel->column >= 0 &&
1.48      oster    2780:            clabel->num_rows > 0 &&
                   2781:            clabel->num_columns > 0 &&
1.186     perry    2782:            clabel->row < clabel->num_rows &&
1.48      oster    2783:            clabel->column < clabel->num_columns &&
                   2784:            clabel->blockSize > 0 &&
                   2785:            clabel->numBlocks > 0) {
                   2786:                /* label looks reasonable enough... */
                   2787:                return(1);
                   2788:        }
                   2789:        return(0);
                   2790: }
                   2791:
                   2792:
1.138     oster    2793: #if DEBUG
1.48      oster    2794: void
1.169     oster    2795: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48      oster    2796: {
                   2797:        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186     perry    2798:               clabel->row, clabel->column,
1.48      oster    2799:               clabel->num_rows, clabel->num_columns);
                   2800:        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
                   2801:               clabel->version, clabel->serial_number,
                   2802:               clabel->mod_counter);
                   2803:        printf("   Clean: %s Status: %d\n",
                   2804:               clabel->clean ? "Yes" : "No", clabel->status );
                   2805:        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
                   2806:               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
                   2807:        printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
1.186     perry    2808:               (char) clabel->parityConfig, clabel->blockSize,
1.48      oster    2809:               clabel->numBlocks);
                   2810:        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
1.186     perry    2811:        printf("   Contains root partition: %s\n",
1.75      oster    2812:               clabel->root_partition ? "Yes" : "No" );
1.48      oster    2813:        printf("   Last configured as: raid%d\n", clabel->last_unit );
1.51      oster    2814: #if 0
                   2815:           printf("   Config order: %d\n", clabel->config_order);
                   2816: #endif
1.186     perry    2817:
1.48      oster    2818: }
1.133     oster    2819: #endif
1.48      oster    2820:
                   2821: RF_ConfigSet_t *
1.169     oster    2822: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48      oster    2823: {
                   2824:        RF_AutoConfig_t *ac;
                   2825:        RF_ConfigSet_t *config_sets;
                   2826:        RF_ConfigSet_t *cset;
                   2827:        RF_AutoConfig_t *ac_next;
                   2828:
                   2829:
                   2830:        config_sets = NULL;
                   2831:
                   2832:        /* Go through the AutoConfig list, and figure out which components
                   2833:           belong to what sets.  */
                   2834:        ac = ac_list;
                   2835:        while(ac!=NULL) {
                   2836:                /* we're going to putz with ac->next, so save it here
                   2837:                   for use at the end of the loop */
                   2838:                ac_next = ac->next;
                   2839:
                   2840:                if (config_sets == NULL) {
                   2841:                        /* will need at least this one... */
                   2842:                        config_sets = (RF_ConfigSet_t *)
1.186     perry    2843:                                malloc(sizeof(RF_ConfigSet_t),
1.48      oster    2844:                                       M_RAIDFRAME, M_NOWAIT);
                   2845:                        if (config_sets == NULL) {
1.141     provos   2846:                                panic("rf_create_auto_sets: No memory!");
1.48      oster    2847:                        }
                   2848:                        /* this one is easy :) */
                   2849:                        config_sets->ac = ac;
                   2850:                        config_sets->next = NULL;
1.51      oster    2851:                        config_sets->rootable = 0;
1.48      oster    2852:                        ac->next = NULL;
                   2853:                } else {
                   2854:                        /* which set does this component fit into? */
                   2855:                        cset = config_sets;
                   2856:                        while(cset!=NULL) {
1.49      oster    2857:                                if (rf_does_it_fit(cset, ac)) {
1.86      oster    2858:                                        /* looks like it matches... */
                   2859:                                        ac->next = cset->ac;
                   2860:                                        cset->ac = ac;
1.48      oster    2861:                                        break;
                   2862:                                }
                   2863:                                cset = cset->next;
                   2864:                        }
                   2865:                        if (cset==NULL) {
                   2866:                                /* didn't find a match above... new set..*/
                   2867:                                cset = (RF_ConfigSet_t *)
1.186     perry    2868:                                        malloc(sizeof(RF_ConfigSet_t),
1.48      oster    2869:                                               M_RAIDFRAME, M_NOWAIT);
                   2870:                                if (cset == NULL) {
1.141     provos   2871:                                        panic("rf_create_auto_sets: No memory!");
1.48      oster    2872:                                }
                   2873:                                cset->ac = ac;
                   2874:                                ac->next = NULL;
                   2875:                                cset->next = config_sets;
1.51      oster    2876:                                cset->rootable = 0;
1.48      oster    2877:                                config_sets = cset;
                   2878:                        }
                   2879:                }
                   2880:                ac = ac_next;
                   2881:        }
                   2882:
                   2883:
                   2884:        return(config_sets);
                   2885: }
                   2886:
                   2887: static int
1.169     oster    2888: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48      oster    2889: {
                   2890:        RF_ComponentLabel_t *clabel1, *clabel2;
                   2891:
                   2892:        /* If this one matches the *first* one in the set, that's good
                   2893:           enough, since the other members of the set would have been
                   2894:           through here too... */
1.60      oster    2895:        /* note that we are not checking partitionSize here..
                   2896:
                   2897:           Note that we are also not checking the mod_counters here.
1.186     perry    2898:           If everything else matches execpt the mod_counter, that's
1.60      oster    2899:           good enough for this test.  We will deal with the mod_counters
1.186     perry    2900:           a little later in the autoconfiguration process.
1.60      oster    2901:
                   2902:            (clabel1->mod_counter == clabel2->mod_counter) &&
1.81      oster    2903:
                   2904:           The reason we don't check for this is that failed disks
                   2905:           will have lower modification counts.  If those disks are
                   2906:           not added to the set they used to belong to, then they will
                   2907:           form their own set, which may result in 2 different sets,
                   2908:           for example, competing to be configured at raid0, and
                   2909:           perhaps competing to be the root filesystem set.  If the
                   2910:           wrong ones get configured, or both attempt to become /,
                   2911:           weird behaviour and or serious lossage will occur.  Thus we
                   2912:           need to bring them into the fold here, and kick them out at
                   2913:           a later point.
1.60      oster    2914:
                   2915:        */
1.48      oster    2916:
                   2917:        clabel1 = cset->ac->clabel;
                   2918:        clabel2 = ac->clabel;
                   2919:        if ((clabel1->version == clabel2->version) &&
                   2920:            (clabel1->serial_number == clabel2->serial_number) &&
                   2921:            (clabel1->num_rows == clabel2->num_rows) &&
                   2922:            (clabel1->num_columns == clabel2->num_columns) &&
                   2923:            (clabel1->sectPerSU == clabel2->sectPerSU) &&
                   2924:            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
                   2925:            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
                   2926:            (clabel1->parityConfig == clabel2->parityConfig) &&
                   2927:            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
                   2928:            (clabel1->blockSize == clabel2->blockSize) &&
                   2929:            (clabel1->numBlocks == clabel2->numBlocks) &&
                   2930:            (clabel1->autoconfigure == clabel2->autoconfigure) &&
                   2931:            (clabel1->root_partition == clabel2->root_partition) &&
                   2932:            (clabel1->last_unit == clabel2->last_unit) &&
                   2933:            (clabel1->config_order == clabel2->config_order)) {
                   2934:                /* if it get's here, it almost *has* to be a match */
                   2935:        } else {
1.186     perry    2936:                /* it's not consistent with somebody in the set..
1.48      oster    2937:                   punt */
                   2938:                return(0);
                   2939:        }
                   2940:        /* all was fine.. it must fit... */
                   2941:        return(1);
                   2942: }
                   2943:
                   2944: int
1.169     oster    2945: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48      oster    2946: {
1.51      oster    2947:        RF_AutoConfig_t *ac;
                   2948:        RF_AutoConfig_t *auto_config;
                   2949:        RF_ComponentLabel_t *clabel;
1.166     oster    2950:        int c;
1.51      oster    2951:        int num_cols;
                   2952:        int num_missing;
1.86      oster    2953:        int mod_counter;
1.87      oster    2954:        int mod_counter_found;
1.88      oster    2955:        int even_pair_failed;
                   2956:        char parity_type;
1.186     perry    2957:
1.51      oster    2958:
1.48      oster    2959:        /* check to see that we have enough 'live' components
                   2960:           of this set.  If so, we can configure it if necessary */
                   2961:
1.51      oster    2962:        num_cols = cset->ac->clabel->num_columns;
1.88      oster    2963:        parity_type = cset->ac->clabel->parityConfig;
1.51      oster    2964:
                   2965:        /* XXX Check for duplicate components!?!?!? */
                   2966:
1.86      oster    2967:        /* Determine what the mod_counter is supposed to be for this set. */
                   2968:
1.87      oster    2969:        mod_counter_found = 0;
1.101     oster    2970:        mod_counter = 0;
1.86      oster    2971:        ac = cset->ac;
                   2972:        while(ac!=NULL) {
1.87      oster    2973:                if (mod_counter_found==0) {
1.86      oster    2974:                        mod_counter = ac->clabel->mod_counter;
1.87      oster    2975:                        mod_counter_found = 1;
                   2976:                } else {
                   2977:                        if (ac->clabel->mod_counter > mod_counter) {
                   2978:                                mod_counter = ac->clabel->mod_counter;
                   2979:                        }
1.86      oster    2980:                }
                   2981:                ac = ac->next;
                   2982:        }
                   2983:
1.51      oster    2984:        num_missing = 0;
                   2985:        auto_config = cset->ac;
                   2986:
1.166     oster    2987:        even_pair_failed = 0;
                   2988:        for(c=0; c<num_cols; c++) {
                   2989:                ac = auto_config;
                   2990:                while(ac!=NULL) {
1.186     perry    2991:                        if ((ac->clabel->column == c) &&
1.166     oster    2992:                            (ac->clabel->mod_counter == mod_counter)) {
                   2993:                                /* it's this one... */
1.51      oster    2994: #if DEBUG
1.166     oster    2995:                                printf("Found: %s at %d\n",
                   2996:                                       ac->devname,c);
1.51      oster    2997: #endif
1.166     oster    2998:                                break;
1.51      oster    2999:                        }
1.166     oster    3000:                        ac=ac->next;
                   3001:                }
                   3002:                if (ac==NULL) {
1.51      oster    3003:                                /* Didn't find one here! */
1.88      oster    3004:                                /* special case for RAID 1, especially
                   3005:                                   where there are more than 2
                   3006:                                   components (where RAIDframe treats
                   3007:                                   things a little differently :( ) */
1.166     oster    3008:                        if (parity_type == '1') {
                   3009:                                if (c%2 == 0) { /* even component */
                   3010:                                        even_pair_failed = 1;
                   3011:                                } else { /* odd component.  If
                   3012:                                            we're failed, and
                   3013:                                            so is the even
                   3014:                                            component, it's
                   3015:                                            "Good Night, Charlie" */
                   3016:                                        if (even_pair_failed == 1) {
                   3017:                                                return(0);
1.88      oster    3018:                                        }
                   3019:                                }
1.166     oster    3020:                        } else {
                   3021:                                /* normal accounting */
                   3022:                                num_missing++;
1.88      oster    3023:                        }
1.166     oster    3024:                }
                   3025:                if ((parity_type == '1') && (c%2 == 1)) {
1.88      oster    3026:                                /* Just did an even component, and we didn't
1.186     perry    3027:                                   bail.. reset the even_pair_failed flag,
1.88      oster    3028:                                   and go on to the next component.... */
1.166     oster    3029:                        even_pair_failed = 0;
1.51      oster    3030:                }
                   3031:        }
                   3032:
                   3033:        clabel = cset->ac->clabel;
                   3034:
                   3035:        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
                   3036:            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
                   3037:            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                   3038:                /* XXX this needs to be made *much* more general */
                   3039:                /* Too many failures */
                   3040:                return(0);
                   3041:        }
                   3042:        /* otherwise, all is well, and we've got enough to take a kick
                   3043:           at autoconfiguring this set */
                   3044:        return(1);
1.48      oster    3045: }
                   3046:
                   3047: void
1.169     oster    3048: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
                   3049:                        RF_Raid_t *raidPtr)
1.48      oster    3050: {
                   3051:        RF_ComponentLabel_t *clabel;
1.77      oster    3052:        int i;
1.48      oster    3053:
                   3054:        clabel = ac->clabel;
                   3055:
                   3056:        /* 1. Fill in the common stuff */
1.166     oster    3057:        config->numRow = clabel->num_rows = 1;
1.48      oster    3058:        config->numCol = clabel->num_columns;
                   3059:        config->numSpare = 0; /* XXX should this be set here? */
                   3060:        config->sectPerSU = clabel->sectPerSU;
                   3061:        config->SUsPerPU = clabel->SUsPerPU;
                   3062:        config->SUsPerRU = clabel->SUsPerRU;
                   3063:        config->parityConfig = clabel->parityConfig;
                   3064:        /* XXX... */
                   3065:        strcpy(config->diskQueueType,"fifo");
                   3066:        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
                   3067:        config->layoutSpecificSize = 0; /* XXX ?? */
                   3068:
                   3069:        while(ac!=NULL) {
                   3070:                /* row/col values will be in range due to the checks
                   3071:                   in reasonable_label() */
1.166     oster    3072:                strcpy(config->devnames[0][ac->clabel->column],
1.48      oster    3073:                       ac->devname);
                   3074:                ac = ac->next;
                   3075:        }
                   3076:
1.77      oster    3077:        for(i=0;i<RF_MAXDBGV;i++) {
1.163     fvdl     3078:                config->debugVars[i][0] = 0;
1.77      oster    3079:        }
1.48      oster    3080: }
                   3081:
                   3082: int
1.169     oster    3083: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3084: {
                   3085:        RF_ComponentLabel_t clabel;
                   3086:        struct vnode *vp;
                   3087:        dev_t dev;
1.166     oster    3088:        int column;
1.148     oster    3089:        int sparecol;
1.48      oster    3090:
1.54      oster    3091:        raidPtr->autoconfigure = new_value;
1.166     oster    3092:
                   3093:        for(column=0; column<raidPtr->numCol; column++) {
                   3094:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
                   3095:                        dev = raidPtr->Disks[column].dev;
                   3096:                        vp = raidPtr->raid_cinfo[column].ci_vp;
                   3097:                        raidread_component_label(dev, vp, &clabel);
                   3098:                        clabel.autoconfigure = new_value;
                   3099:                        raidwrite_component_label(dev, vp, &clabel);
1.48      oster    3100:                }
                   3101:        }
1.148     oster    3102:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3103:                sparecol = raidPtr->numCol + column;
1.166     oster    3104:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                   3105:                        dev = raidPtr->Disks[sparecol].dev;
                   3106:                        vp = raidPtr->raid_cinfo[sparecol].ci_vp;
1.148     oster    3107:                        raidread_component_label(dev, vp, &clabel);
                   3108:                        clabel.autoconfigure = new_value;
                   3109:                        raidwrite_component_label(dev, vp, &clabel);
                   3110:                }
                   3111:        }
1.48      oster    3112:        return(new_value);
                   3113: }
                   3114:
                   3115: int
1.169     oster    3116: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48      oster    3117: {
                   3118:        RF_ComponentLabel_t clabel;
                   3119:        struct vnode *vp;
                   3120:        dev_t dev;
1.166     oster    3121:        int column;
1.148     oster    3122:        int sparecol;
1.48      oster    3123:
1.54      oster    3124:        raidPtr->root_partition = new_value;
1.166     oster    3125:        for(column=0; column<raidPtr->numCol; column++) {
                   3126:                if (raidPtr->Disks[column].status == rf_ds_optimal) {
                   3127:                        dev = raidPtr->Disks[column].dev;
                   3128:                        vp = raidPtr->raid_cinfo[column].ci_vp;
                   3129:                        raidread_component_label(dev, vp, &clabel);
                   3130:                        clabel.root_partition = new_value;
                   3131:                        raidwrite_component_label(dev, vp, &clabel);
1.148     oster    3132:                }
                   3133:        }
                   3134:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3135:                sparecol = raidPtr->numCol + column;
1.166     oster    3136:                if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
                   3137:                        dev = raidPtr->Disks[sparecol].dev;
                   3138:                        vp = raidPtr->raid_cinfo[sparecol].ci_vp;
1.148     oster    3139:                        raidread_component_label(dev, vp, &clabel);
                   3140:                        clabel.root_partition = new_value;
                   3141:                        raidwrite_component_label(dev, vp, &clabel);
1.48      oster    3142:                }
                   3143:        }
                   3144:        return(new_value);
                   3145: }
                   3146:
                   3147: void
1.169     oster    3148: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48      oster    3149: {
                   3150:        RF_AutoConfig_t *ac;
1.186     perry    3151:
1.48      oster    3152:        ac = cset->ac;
                   3153:        while(ac!=NULL) {
                   3154:                /* Close the vp, and give it back */
                   3155:                if (ac->vp) {
1.96      oster    3156:                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.48      oster    3157:                        VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
                   3158:                        vput(ac->vp);
1.86      oster    3159:                        ac->vp = NULL;
1.48      oster    3160:                }
                   3161:                ac = ac->next;
                   3162:        }
                   3163: }
                   3164:
                   3165:
                   3166: void
1.169     oster    3167: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48      oster    3168: {
                   3169:        RF_AutoConfig_t *ac;
                   3170:        RF_AutoConfig_t *next_ac;
1.186     perry    3171:
1.48      oster    3172:        ac = cset->ac;
                   3173:        while(ac!=NULL) {
                   3174:                next_ac = ac->next;
                   3175:                /* nuke the label */
                   3176:                free(ac->clabel, M_RAIDFRAME);
                   3177:                /* cleanup the config structure */
                   3178:                free(ac, M_RAIDFRAME);
                   3179:                /* "next.." */
                   3180:                ac = next_ac;
                   3181:        }
                   3182:        /* and, finally, nuke the config set */
                   3183:        free(cset, M_RAIDFRAME);
                   3184: }
                   3185:
                   3186:
                   3187: void
1.169     oster    3188: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48      oster    3189: {
                   3190:        /* current version number */
1.186     perry    3191:        clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57      oster    3192:        clabel->serial_number = raidPtr->serial_number;
1.48      oster    3193:        clabel->mod_counter = raidPtr->mod_counter;
1.166     oster    3194:        clabel->num_rows = 1;
1.48      oster    3195:        clabel->num_columns = raidPtr->numCol;
                   3196:        clabel->clean = RF_RAID_DIRTY; /* not clean */
                   3197:        clabel->status = rf_ds_optimal; /* "It's good!" */
1.186     perry    3198:
1.48      oster    3199:        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                   3200:        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
                   3201:        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54      oster    3202:
                   3203:        clabel->blockSize = raidPtr->bytesPerSector;
                   3204:        clabel->numBlocks = raidPtr->sectorsPerDisk;
                   3205:
1.48      oster    3206:        /* XXX not portable */
                   3207:        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54      oster    3208:        clabel->maxOutstanding = raidPtr->maxOutstanding;
                   3209:        clabel->autoconfigure = raidPtr->autoconfigure;
                   3210:        clabel->root_partition = raidPtr->root_partition;
1.48      oster    3211:        clabel->last_unit = raidPtr->raidid;
1.54      oster    3212:        clabel->config_order = raidPtr->config_order;
1.51      oster    3213: }
                   3214:
                   3215: int
1.169     oster    3216: rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
1.51      oster    3217: {
                   3218:        RF_Raid_t *raidPtr;
                   3219:        RF_Config_t *config;
                   3220:        int raidID;
                   3221:        int retcode;
                   3222:
1.127     oster    3223: #if DEBUG
1.72      oster    3224:        printf("RAID autoconfigure\n");
1.127     oster    3225: #endif
1.51      oster    3226:
                   3227:        retcode = 0;
                   3228:        *unit = -1;
                   3229:
                   3230:        /* 1. Create a config structure */
                   3231:
                   3232:        config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
                   3233:                                       M_RAIDFRAME,
                   3234:                                       M_NOWAIT);
                   3235:        if (config==NULL) {
                   3236:                printf("Out of mem!?!?\n");
                   3237:                                /* XXX do something more intelligent here. */
                   3238:                return(1);
                   3239:        }
1.77      oster    3240:
                   3241:        memset(config, 0, sizeof(RF_Config_t));
1.51      oster    3242:
1.186     perry    3243:        /*
                   3244:           2. Figure out what RAID ID this one is supposed to live at
1.51      oster    3245:           See if we can get the same RAID dev that it was configured
1.186     perry    3246:           on last time..
1.51      oster    3247:        */
                   3248:
                   3249:        raidID = cset->ac->clabel->last_unit;
1.52      oster    3250:        if ((raidID < 0) || (raidID >= numraid)) {
1.51      oster    3251:                /* let's not wander off into lala land. */
                   3252:                raidID = numraid - 1;
                   3253:        }
                   3254:        if (raidPtrs[raidID]->valid != 0) {
                   3255:
1.186     perry    3256:                /*
                   3257:                   Nope... Go looking for an alternative...
1.51      oster    3258:                   Start high so we don't immediately use raid0 if that's
1.186     perry    3259:                   not taken.
1.51      oster    3260:                */
                   3261:
1.115     oster    3262:                for(raidID = numraid - 1; raidID >= 0; raidID--) {
1.51      oster    3263:                        if (raidPtrs[raidID]->valid == 0) {
                   3264:                                /* can use this one! */
                   3265:                                break;
                   3266:                        }
                   3267:                }
                   3268:        }
                   3269:
                   3270:        if (raidID < 0) {
                   3271:                /* punt... */
                   3272:                printf("Unable to auto configure this set!\n");
                   3273:                printf("(Out of RAID devs!)\n");
                   3274:                return(1);
                   3275:        }
1.127     oster    3276:
                   3277: #if DEBUG
1.72      oster    3278:        printf("Configuring raid%d:\n",raidID);
1.127     oster    3279: #endif
                   3280:
1.51      oster    3281:        raidPtr = raidPtrs[raidID];
                   3282:
                   3283:        /* XXX all this stuff should be done SOMEWHERE ELSE! */
                   3284:        raidPtr->raidid = raidID;
                   3285:        raidPtr->openings = RAIDOUTSTANDING;
                   3286:
                   3287:        /* 3. Build the configuration structure */
                   3288:        rf_create_configuration(cset->ac, config, raidPtr);
                   3289:
                   3290:        /* 4. Do the configuration */
                   3291:        retcode = rf_Configure(raidPtr, config, cset->ac);
1.186     perry    3292:
1.51      oster    3293:        if (retcode == 0) {
1.61      oster    3294:
1.59      oster    3295:                raidinit(raidPtrs[raidID]);
                   3296:
                   3297:                rf_markalldirty(raidPtrs[raidID]);
1.54      oster    3298:                raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
1.51      oster    3299:                if (cset->ac->clabel->root_partition==1) {
                   3300:                        /* everything configured just fine.  Make a note
                   3301:                           that this set is eligible to be root. */
                   3302:                        cset->rootable = 1;
1.54      oster    3303:                        /* XXX do this here? */
1.186     perry    3304:                        raidPtrs[raidID]->root_partition = 1;
1.51      oster    3305:                }
                   3306:        }
                   3307:
                   3308:        /* 5. Cleanup */
                   3309:        free(config, M_RAIDFRAME);
1.186     perry    3310:
1.51      oster    3311:        *unit = raidID;
                   3312:        return(retcode);
1.99      oster    3313: }
                   3314:
                   3315: void
1.169     oster    3316: rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
1.99      oster    3317: {
                   3318:        struct buf *bp;
                   3319:
                   3320:        bp = (struct buf *)desc->bp;
1.186     perry    3321:        disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
1.145     mrg      3322:            (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
1.13      oster    3323: }
1.177     oster    3324:
                   3325: void
1.187     christos 3326: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
                   3327:             size_t xmin, size_t xmax)
1.177     oster    3328: {
1.186     perry    3329:        pool_init(p, size, 0, 0, 0, w_chan, NULL);
1.187     christos 3330:        pool_sethiwat(p, xmax);
                   3331:        pool_prime(p, xmin);
                   3332:        pool_setlowat(p, xmin);
1.177     oster    3333: }
1.190   ! oster    3334:
        !          3335: /*
        !          3336:  * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
        !          3337:  * if there is IO pending and if that IO could possibly be done for a
        !          3338:  * given RAID set.  Returns 0 if IO is waiting and can be done, 1
        !          3339:  * otherwise.
        !          3340:  *
        !          3341:  */
        !          3342:
        !          3343: int
        !          3344: rf_buf_queue_check(int raidid)
        !          3345: {
        !          3346:        if ((BUFQ_PEEK(&(raid_softc[raidid].buf_queue)) != NULL) &&
        !          3347:            raidPtrs[raidid]->openings > 0) {
        !          3348:                /* there is work to do */
        !          3349:                return 0;
        !          3350:        }
        !          3351:        /* default is nothing to do */
        !          3352:        return 1;
        !          3353: }

CVSweb <webmaster@jp.NetBSD.org>