[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.153

1.153   ! thorpej     1: /*     $NetBSD: rf_netbsdkintf.c,v 1.152 2003/01/19 16:52:55 thorpej Exp $     */
1.1       oster       2: /*-
                      3:  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
                      4:  * All rights reserved.
                      5:  *
                      6:  * This code is derived from software contributed to The NetBSD Foundation
                      7:  * by Greg Oster; Jason R. Thorpe.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *        This product includes software developed by the NetBSD
                     20:  *        Foundation, Inc. and its contributors.
                     21:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     22:  *    contributors may be used to endorse or promote products derived
                     23:  *    from this software without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     26:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     27:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     28:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     29:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     30:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     31:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     32:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     33:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     34:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     35:  * POSSIBILITY OF SUCH DAMAGE.
                     36:  */
                     37:
                     38: /*
                     39:  * Copyright (c) 1988 University of Utah.
                     40:  * Copyright (c) 1990, 1993
                     41:  *      The Regents of the University of California.  All rights reserved.
                     42:  *
                     43:  * This code is derived from software contributed to Berkeley by
                     44:  * the Systems Programming Group of the University of Utah Computer
                     45:  * Science Department.
                     46:  *
                     47:  * Redistribution and use in source and binary forms, with or without
                     48:  * modification, are permitted provided that the following conditions
                     49:  * are met:
                     50:  * 1. Redistributions of source code must retain the above copyright
                     51:  *    notice, this list of conditions and the following disclaimer.
                     52:  * 2. Redistributions in binary form must reproduce the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer in the
                     54:  *    documentation and/or other materials provided with the distribution.
                     55:  * 3. All advertising materials mentioning features or use of this software
                     56:  *    must display the following acknowledgement:
                     57:  *      This product includes software developed by the University of
                     58:  *      California, Berkeley and its contributors.
                     59:  * 4. Neither the name of the University nor the names of its contributors
                     60:  *    may be used to endorse or promote products derived from this software
                     61:  *    without specific prior written permission.
                     62:  *
                     63:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     64:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     65:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     66:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     67:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     68:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     69:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     70:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     71:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     72:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     73:  * SUCH DAMAGE.
                     74:  *
                     75:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                     76:  *
                     77:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     78:  */
                     79:
                     80: /*
                     81:  * Copyright (c) 1995 Carnegie-Mellon University.
                     82:  * All rights reserved.
                     83:  *
                     84:  * Authors: Mark Holland, Jim Zelenka
                     85:  *
                     86:  * Permission to use, copy, modify and distribute this software and
                     87:  * its documentation is hereby granted, provided that both the copyright
                     88:  * notice and this permission notice appear in all copies of the
                     89:  * software, derivative works or modified versions, and any portions
                     90:  * thereof, and that both notices appear in supporting documentation.
                     91:  *
                     92:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     93:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     94:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     95:  *
                     96:  * Carnegie Mellon requests users of this software to return to
                     97:  *
                     98:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                     99:  *  School of Computer Science
                    100:  *  Carnegie Mellon University
                    101:  *  Pittsburgh PA 15213-3890
                    102:  *
                    103:  * any improvements or extensions that they make and grant Carnegie the
                    104:  * rights to redistribute these changes.
                    105:  */
                    106:
                    107: /***********************************************************
                    108:  *
                    109:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    110:  *
                    111:  ***********************************************************/
1.112     lukem     112:
                    113: #include <sys/cdefs.h>
1.153   ! thorpej   114: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.152 2003/01/19 16:52:55 thorpej Exp $");
1.1       oster     115:
1.113     lukem     116: #include <sys/param.h>
1.1       oster     117: #include <sys/errno.h>
                    118: #include <sys/pool.h>
1.152     thorpej   119: #include <sys/proc.h>
1.1       oster     120: #include <sys/queue.h>
                    121: #include <sys/disk.h>
                    122: #include <sys/device.h>
                    123: #include <sys/stat.h>
                    124: #include <sys/ioctl.h>
                    125: #include <sys/fcntl.h>
                    126: #include <sys/systm.h>
                    127: #include <sys/namei.h>
                    128: #include <sys/vnode.h>
                    129: #include <sys/disklabel.h>
                    130: #include <sys/conf.h>
                    131: #include <sys/lock.h>
                    132: #include <sys/buf.h>
                    133: #include <sys/user.h>
1.65      oster     134: #include <sys/reboot.h>
1.8       oster     135:
1.110     oster     136: #include <dev/raidframe/raidframevar.h>
                    137: #include <dev/raidframe/raidframeio.h>
1.8       oster     138: #include "raid.h"
1.62      oster     139: #include "opt_raid_autoconfig.h"
1.1       oster     140: #include "rf_raid.h"
1.44      oster     141: #include "rf_copyback.h"
1.1       oster     142: #include "rf_dag.h"
                    143: #include "rf_dagflags.h"
1.99      oster     144: #include "rf_desc.h"
1.1       oster     145: #include "rf_diskqueue.h"
                    146: #include "rf_etimer.h"
                    147: #include "rf_general.h"
                    148: #include "rf_kintf.h"
                    149: #include "rf_options.h"
                    150: #include "rf_driver.h"
                    151: #include "rf_parityscan.h"
                    152: #include "rf_threadstuff.h"
                    153:
1.133     oster     154: #ifdef DEBUG
1.9       oster     155: int     rf_kdebug_level = 0;
1.1       oster     156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9       oster     157: #else                          /* DEBUG */
1.1       oster     158: #define db1_printf(a) { }
1.9       oster     159: #endif                         /* DEBUG */
1.1       oster     160:
1.9       oster     161: static RF_Raid_t **raidPtrs;   /* global raid device descriptors */
1.1       oster     162:
1.11      oster     163: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
1.1       oster     164:
1.10      oster     165: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    166:                                                 * spare table */
                    167: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    168:                                                 * installation process */
1.153   ! thorpej   169:
        !           170: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10      oster     171:
1.1       oster     172: /* prototypes */
1.10      oster     173: static void KernelWakeupFunc(struct buf * bp);
                    174: static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
                    175:                   dev_t dev, RF_SectorNum_t startSect,
                    176:                   RF_SectorCount_t numSect, caddr_t buf,
                    177:                   void (*cbFunc) (struct buf *), void *cbArg,
                    178:                   int logBytesPerSector, struct proc * b_proc);
1.104     oster     179: static void raidinit(RF_Raid_t *);
1.1       oster     180:
1.104     oster     181: void raidattach(int);
1.130     gehenna   182:
                    183: dev_type_open(raidopen);
                    184: dev_type_close(raidclose);
                    185: dev_type_read(raidread);
                    186: dev_type_write(raidwrite);
                    187: dev_type_ioctl(raidioctl);
                    188: dev_type_strategy(raidstrategy);
                    189: dev_type_dump(raiddump);
                    190: dev_type_size(raidsize);
                    191:
                    192: const struct bdevsw raid_bdevsw = {
                    193:        raidopen, raidclose, raidstrategy, raidioctl,
                    194:        raiddump, raidsize, D_DISK
                    195: };
                    196:
                    197: const struct cdevsw raid_cdevsw = {
                    198:        raidopen, raidclose, raidread, raidwrite, raidioctl,
1.144     jdolecek  199:        nostop, notty, nopoll, nommap, nokqfilter, D_DISK
1.130     gehenna   200: };
1.1       oster     201:
                    202: /*
                    203:  * Pilfered from ccd.c
                    204:  */
                    205:
1.10      oster     206: struct raidbuf {
                    207:        struct buf rf_buf;      /* new I/O buf.  MUST BE FIRST!!! */
                    208:        struct buf *rf_obp;     /* ptr. to original I/O buf */
1.11      oster     209:        RF_DiskQueueData_t *req;/* the request that this was part of.. */
1.10      oster     210: };
1.1       oster     211:
1.116     thorpej   212: /* component buffer pool */
                    213: struct pool raidframe_cbufpool;
1.1       oster     214:
1.9       oster     215: /* XXX Not sure if the following should be replacing the raidPtrs above,
1.53      oster     216:    or if it should be used in conjunction with that...
1.59      oster     217: */
1.1       oster     218:
1.10      oster     219: struct raid_softc {
                    220:        int     sc_flags;       /* flags */
                    221:        int     sc_cflags;      /* configuration flags */
1.11      oster     222:        size_t  sc_size;        /* size of the raid device */
1.10      oster     223:        char    sc_xname[20];   /* XXX external name */
                    224:        struct disk sc_dkdev;   /* generic disk device info */
1.125     hannken   225:        struct bufq_state buf_queue;    /* used for the device queue */
1.10      oster     226: };
1.1       oster     227: /* sc_flags */
                    228: #define RAIDF_INITED   0x01    /* unit has been initialized */
                    229: #define RAIDF_WLABEL   0x02    /* label area is writable */
                    230: #define RAIDF_LABELLING        0x04    /* unit is currently being labelled */
                    231: #define RAIDF_WANTED   0x40    /* someone is waiting to obtain a lock */
                    232: #define RAIDF_LOCKED   0x80    /* unit is locked */
                    233:
                    234: #define        raidunit(x)     DISKUNIT(x)
1.48      oster     235: int numraid = 0;
1.1       oster     236:
1.20      oster     237: /*
                    238:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    239:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     240:  * kernel memory, especially on writes, and in degraded mode reads.
                    241:  *
                    242:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    243:  * a single 64K write will typically require 64K for the old data,
                    244:  * 64K for the old parity, and 64K for the new parity, for a total
                    245:  * of 192K (if the parity buffer is not re-used immediately).
1.110     oster     246:  * Even it if is used immediately, that's still 128K, which when multiplied
1.28      oster     247:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
                    248:  *
                    249:  * Now in degraded mode, for example, a 64K read on the above setup may
                    250:  * require data reconstruction, which will require *all* of the 4 remaining
                    251:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     252:  */
                    253:
                    254: #ifndef RAIDOUTSTANDING
1.28      oster     255: #define RAIDOUTSTANDING   6
1.20      oster     256: #endif
                    257:
1.1       oster     258: #define RAIDLABELDEV(dev)      \
                    259:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    260:
                    261: /* declared here, and made public, for the benefit of KVM stuff.. */
1.10      oster     262: struct raid_softc *raid_softc;
1.9       oster     263:
1.104     oster     264: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
                    265:                                     struct disklabel *);
                    266: static void raidgetdisklabel(dev_t);
                    267: static void raidmakedisklabel(struct raid_softc *);
1.1       oster     268:
1.104     oster     269: static int raidlock(struct raid_softc *);
                    270: static void raidunlock(struct raid_softc *);
1.1       oster     271:
1.104     oster     272: static void rf_markalldirty(RF_Raid_t *);
1.48      oster     273:
                    274: struct device *raidrootdev;
1.1       oster     275:
1.104     oster     276: void rf_ReconThread(struct rf_recon_req *);
1.37      oster     277: /* XXX what I want is: */
1.104     oster     278: /*void rf_ReconThread(RF_Raid_t *raidPtr);  */
                    279: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
                    280: void rf_CopybackThread(RF_Raid_t *raidPtr);
                    281: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
1.142     thorpej   282: int rf_autoconfig(struct device *self);
                    283: void rf_buildroothack(RF_ConfigSet_t *);
1.104     oster     284:
                    285: RF_AutoConfig_t *rf_find_raid_components(void);
                    286: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
                    287: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
                    288: static int rf_reasonable_label(RF_ComponentLabel_t *);
                    289: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
                    290: int rf_set_autoconfig(RF_Raid_t *, int);
                    291: int rf_set_rootpartition(RF_Raid_t *, int);
                    292: void rf_release_all_vps(RF_ConfigSet_t *);
                    293: void rf_cleanup_config_set(RF_ConfigSet_t *);
                    294: int rf_have_enough_components(RF_ConfigSet_t *);
                    295: int rf_auto_config_set(RF_ConfigSet_t *, int *);
1.48      oster     296:
                    297: static int raidautoconfig = 0; /* Debugging, mostly.  Set to 0 to not
1.62      oster     298:                                  allow autoconfig to take place.
                    299:                                  Note that this is overridden by having
                    300:                                  RAID_AUTOCONFIG as an option in the
                    301:                                  kernel config file.  */
1.37      oster     302:
1.10      oster     303: void
                    304: raidattach(num)
1.9       oster     305:        int     num;
1.1       oster     306: {
1.14      oster     307:        int raidID;
                    308:        int i, rc;
1.1       oster     309:
                    310: #ifdef DEBUG
1.9       oster     311:        printf("raidattach: Asked for %d units\n", num);
1.1       oster     312: #endif
                    313:
                    314:        if (num <= 0) {
                    315: #ifdef DIAGNOSTIC
                    316:                panic("raidattach: count <= 0");
                    317: #endif
                    318:                return;
                    319:        }
1.9       oster     320:        /* This is where all the initialization stuff gets done. */
1.1       oster     321:
1.50      oster     322:        numraid = num;
                    323:
1.1       oster     324:        /* Make some space for requested number of units... */
                    325:
                    326:        RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
                    327:        if (raidPtrs == NULL) {
1.141     provos    328:                panic("raidPtrs is NULL!!");
1.1       oster     329:        }
1.116     thorpej   330:
                    331:        /* Initialize the component buffer pool. */
                    332:        pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
1.117     thorpej   333:            0, 0, "raidpl", NULL);
1.116     thorpej   334:
1.14      oster     335:        rc = rf_mutex_init(&rf_sparet_wait_mutex);
                    336:        if (rc) {
                    337:                RF_PANIC();
                    338:        }
                    339:
                    340:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
                    341:
1.58      oster     342:        for (i = 0; i < num; i++)
1.14      oster     343:                raidPtrs[i] = NULL;
                    344:        rc = rf_BootRaidframe();
                    345:        if (rc == 0)
                    346:                printf("Kernelized RAIDframe activated\n");
                    347:        else
1.141     provos    348:                panic("Serious error booting RAID!!");
1.14      oster     349:
1.9       oster     350:        /* put together some datastructures like the CCD device does.. This
                    351:         * lets us lock the device and what-not when it gets opened. */
1.1       oster     352:
                    353:        raid_softc = (struct raid_softc *)
1.48      oster     354:                malloc(num * sizeof(struct raid_softc),
                    355:                       M_RAIDFRAME, M_NOWAIT);
1.1       oster     356:        if (raid_softc == NULL) {
                    357:                printf("WARNING: no memory for RAIDframe driver\n");
                    358:                return;
                    359:        }
1.50      oster     360:
1.108     thorpej   361:        memset(raid_softc, 0, num * sizeof(struct raid_softc));
1.34      oster     362:
1.48      oster     363:        raidrootdev = (struct device *)malloc(num * sizeof(struct device),
                    364:                                              M_RAIDFRAME, M_NOWAIT);
                    365:        if (raidrootdev == NULL) {
1.141     provos    366:                panic("No memory for RAIDframe driver!!?!?!");
1.48      oster     367:        }
                    368:
1.9       oster     369:        for (raidID = 0; raidID < num; raidID++) {
1.126     hannken   370:                bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
1.48      oster     371:
                    372:                raidrootdev[raidID].dv_class  = DV_DISK;
                    373:                raidrootdev[raidID].dv_cfdata = NULL;
                    374:                raidrootdev[raidID].dv_unit   = raidID;
                    375:                raidrootdev[raidID].dv_parent = NULL;
                    376:                raidrootdev[raidID].dv_flags  = 0;
                    377:                sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
                    378:
1.9       oster     379:                RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
1.11      oster     380:                          (RF_Raid_t *));
1.9       oster     381:                if (raidPtrs[raidID] == NULL) {
1.39      oster     382:                        printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
                    383:                        numraid = raidID;
                    384:                        return;
1.1       oster     385:                }
                    386:        }
1.48      oster     387:
1.114     lukem     388: #ifdef RAID_AUTOCONFIG
1.62      oster     389:        raidautoconfig = 1;
                    390: #endif
                    391:
1.142     thorpej   392:        /*
                    393:         * Register a finalizer which will be used to auto-config RAID
                    394:         * sets once all real hardware devices have been found.
                    395:         */
                    396:        if (config_finalize_register(NULL, rf_autoconfig) != 0)
                    397:                printf("WARNING: unable to register RAIDframe finalizer\n");
                    398: }
                    399:
                    400: int
                    401: rf_autoconfig(struct device *self)
                    402: {
                    403:        RF_AutoConfig_t *ac_list;
                    404:        RF_ConfigSet_t *config_sets;
                    405:
                    406:        if (raidautoconfig == 0)
                    407:                return (0);
                    408:
                    409:        /* XXX This code can only be run once. */
                    410:        raidautoconfig = 0;
                    411:
1.48      oster     412:        /* 1. locate all RAID components on the system */
1.142     thorpej   413: #ifdef DEBUG
                    414:        printf("Searching for RAID components...\n");
1.48      oster     415: #endif
                    416:        ac_list = rf_find_raid_components();
                    417:
1.142     thorpej   418:        /* 2. Sort them into their respective sets. */
1.48      oster     419:        config_sets = rf_create_auto_sets(ac_list);
                    420:
1.142     thorpej   421:        /*
                    422:         * 3. Evaluate each set andconfigure the valid ones.
                    423:         * This gets done in rf_buildroothack().
                    424:         */
                    425:        rf_buildroothack(config_sets);
1.48      oster     426:
1.142     thorpej   427:        return (1);
1.48      oster     428: }
                    429:
                    430: void
1.142     thorpej   431: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48      oster     432: {
                    433:        RF_ConfigSet_t *cset;
                    434:        RF_ConfigSet_t *next_cset;
1.51      oster     435:        int retcode;
1.48      oster     436:        int raidID;
1.51      oster     437:        int rootID;
                    438:        int num_root;
1.48      oster     439:
1.101     oster     440:        rootID = 0;
1.51      oster     441:        num_root = 0;
1.48      oster     442:        cset = config_sets;
                    443:        while(cset != NULL ) {
                    444:                next_cset = cset->next;
1.51      oster     445:                if (rf_have_enough_components(cset) &&
                    446:                    cset->ac->clabel->autoconfigure==1) {
                    447:                        retcode = rf_auto_config_set(cset,&raidID);
                    448:                        if (!retcode) {
                    449:                                if (cset->rootable) {
                    450:                                        rootID = raidID;
                    451:                                        num_root++;
                    452:                                }
                    453:                        } else {
                    454:                                /* The autoconfig didn't work :( */
                    455: #if DEBUG
                    456:                                printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
                    457: #endif
                    458:                                rf_release_all_vps(cset);
1.48      oster     459:                        }
                    460:                } else {
                    461:                        /* we're not autoconfiguring this set...
                    462:                           release the associated resources */
1.49      oster     463:                        rf_release_all_vps(cset);
1.48      oster     464:                }
                    465:                /* cleanup */
1.49      oster     466:                rf_cleanup_config_set(cset);
1.48      oster     467:                cset = next_cset;
                    468:        }
1.122     oster     469:
                    470:        /* we found something bootable... */
                    471:
                    472:        if (num_root == 1) {
                    473:                booted_device = &raidrootdev[rootID];
                    474:        } else if (num_root > 1) {
                    475:                /* we can't guess.. require the user to answer... */
                    476:                boothowto |= RB_ASKNAME;
1.51      oster     477:        }
1.1       oster     478: }
                    479:
                    480:
                    481: int
                    482: raidsize(dev)
1.9       oster     483:        dev_t   dev;
1.1       oster     484: {
                    485:        struct raid_softc *rs;
                    486:        struct disklabel *lp;
1.9       oster     487:        int     part, unit, omask, size;
1.1       oster     488:
                    489:        unit = raidunit(dev);
                    490:        if (unit >= numraid)
                    491:                return (-1);
                    492:        rs = &raid_softc[unit];
                    493:
                    494:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    495:                return (-1);
                    496:
                    497:        part = DISKPART(dev);
                    498:        omask = rs->sc_dkdev.dk_openmask & (1 << part);
                    499:        lp = rs->sc_dkdev.dk_label;
                    500:
                    501:        if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
                    502:                return (-1);
                    503:
                    504:        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                    505:                size = -1;
                    506:        else
                    507:                size = lp->d_partitions[part].p_size *
                    508:                    (lp->d_secsize / DEV_BSIZE);
                    509:
                    510:        if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
                    511:                return (-1);
                    512:
                    513:        return (size);
                    514:
                    515: }
                    516:
                    517: int
                    518: raiddump(dev, blkno, va, size)
1.9       oster     519:        dev_t   dev;
1.1       oster     520:        daddr_t blkno;
                    521:        caddr_t va;
1.9       oster     522:        size_t  size;
1.1       oster     523: {
                    524:        /* Not implemented. */
                    525:        return ENXIO;
                    526: }
                    527: /* ARGSUSED */
                    528: int
                    529: raidopen(dev, flags, fmt, p)
1.9       oster     530:        dev_t   dev;
                    531:        int     flags, fmt;
1.1       oster     532:        struct proc *p;
                    533: {
1.9       oster     534:        int     unit = raidunit(dev);
1.1       oster     535:        struct raid_softc *rs;
                    536:        struct disklabel *lp;
1.9       oster     537:        int     part, pmask;
                    538:        int     error = 0;
                    539:
1.1       oster     540:        if (unit >= numraid)
                    541:                return (ENXIO);
                    542:        rs = &raid_softc[unit];
                    543:
                    544:        if ((error = raidlock(rs)) != 0)
1.9       oster     545:                return (error);
1.1       oster     546:        lp = rs->sc_dkdev.dk_label;
                    547:
                    548:        part = DISKPART(dev);
                    549:        pmask = (1 << part);
                    550:
                    551:        if ((rs->sc_flags & RAIDF_INITED) &&
                    552:            (rs->sc_dkdev.dk_openmask == 0))
1.9       oster     553:                raidgetdisklabel(dev);
1.1       oster     554:
                    555:        /* make sure that this partition exists */
                    556:
                    557:        if (part != RAW_PART) {
                    558:                if (((rs->sc_flags & RAIDF_INITED) == 0) ||
                    559:                    ((part >= lp->d_npartitions) ||
1.9       oster     560:                        (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1       oster     561:                        error = ENXIO;
                    562:                        raidunlock(rs);
1.9       oster     563:                        return (error);
1.1       oster     564:                }
                    565:        }
                    566:        /* Prevent this unit from being unconfigured while open. */
                    567:        switch (fmt) {
                    568:        case S_IFCHR:
                    569:                rs->sc_dkdev.dk_copenmask |= pmask;
                    570:                break;
                    571:
                    572:        case S_IFBLK:
                    573:                rs->sc_dkdev.dk_bopenmask |= pmask;
                    574:                break;
                    575:        }
1.13      oster     576:
                    577:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    578:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    579:                /* First one... mark things as dirty... Note that we *MUST*
                    580:                 have done a configure before this.  I DO NOT WANT TO BE
                    581:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    582:                 THAT THEY BELONG TOGETHER!!!!! */
                    583:                /* XXX should check to see if we're only open for reading
                    584:                   here... If so, we needn't do this, but then need some
                    585:                   other way of keeping track of what's happened.. */
                    586:
                    587:                rf_markalldirty( raidPtrs[unit] );
                    588:        }
                    589:
                    590:
1.1       oster     591:        rs->sc_dkdev.dk_openmask =
                    592:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
                    593:
                    594:        raidunlock(rs);
                    595:
1.9       oster     596:        return (error);
1.1       oster     597:
                    598:
                    599: }
                    600: /* ARGSUSED */
                    601: int
                    602: raidclose(dev, flags, fmt, p)
1.9       oster     603:        dev_t   dev;
                    604:        int     flags, fmt;
1.1       oster     605:        struct proc *p;
                    606: {
1.9       oster     607:        int     unit = raidunit(dev);
1.1       oster     608:        struct raid_softc *rs;
1.9       oster     609:        int     error = 0;
                    610:        int     part;
1.1       oster     611:
                    612:        if (unit >= numraid)
                    613:                return (ENXIO);
                    614:        rs = &raid_softc[unit];
                    615:
                    616:        if ((error = raidlock(rs)) != 0)
                    617:                return (error);
                    618:
                    619:        part = DISKPART(dev);
                    620:
                    621:        /* ...that much closer to allowing unconfiguration... */
                    622:        switch (fmt) {
                    623:        case S_IFCHR:
                    624:                rs->sc_dkdev.dk_copenmask &= ~(1 << part);
                    625:                break;
                    626:
                    627:        case S_IFBLK:
                    628:                rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
                    629:                break;
                    630:        }
                    631:        rs->sc_dkdev.dk_openmask =
                    632:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.13      oster     633:
                    634:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    635:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    636:                /* Last one... device is not unconfigured yet.
                    637:                   Device shutdown has taken care of setting the
                    638:                   clean bits if RAIDF_INITED is not set
                    639:                   mark things as clean... */
1.147     oster     640:
1.91      oster     641:                rf_update_component_labels(raidPtrs[unit],
                    642:                                                 RF_FINAL_COMPONENT_UPDATE);
1.107     oster     643:                if (doing_shutdown) {
                    644:                        /* last one, and we're going down, so
                    645:                           lights out for this RAID set too. */
                    646:                        error = rf_Shutdown(raidPtrs[unit]);
                    647:
                    648:                        /* It's no longer initialized... */
                    649:                        rs->sc_flags &= ~RAIDF_INITED;
                    650:
                    651:                        /* Detach the disk. */
                    652:                        disk_detach(&rs->sc_dkdev);
                    653:                }
1.13      oster     654:        }
1.1       oster     655:
                    656:        raidunlock(rs);
                    657:        return (0);
                    658:
                    659: }
                    660:
                    661: void
                    662: raidstrategy(bp)
1.74      augustss  663:        struct buf *bp;
1.1       oster     664: {
1.74      augustss  665:        int s;
1.1       oster     666:
                    667:        unsigned int raidID = raidunit(bp->b_dev);
                    668:        RF_Raid_t *raidPtr;
                    669:        struct raid_softc *rs = &raid_softc[raidID];
                    670:        struct disklabel *lp;
1.9       oster     671:        int     wlabel;
1.1       oster     672:
1.30      oster     673:        if ((rs->sc_flags & RAIDF_INITED) ==0) {
                    674:                bp->b_error = ENXIO;
1.100     chs       675:                bp->b_flags |= B_ERROR;
1.30      oster     676:                bp->b_resid = bp->b_bcount;
                    677:                biodone(bp);
1.1       oster     678:                return;
1.30      oster     679:        }
1.1       oster     680:        if (raidID >= numraid || !raidPtrs[raidID]) {
                    681:                bp->b_error = ENODEV;
                    682:                bp->b_flags |= B_ERROR;
                    683:                bp->b_resid = bp->b_bcount;
                    684:                biodone(bp);
                    685:                return;
                    686:        }
                    687:        raidPtr = raidPtrs[raidID];
                    688:        if (!raidPtr->valid) {
                    689:                bp->b_error = ENODEV;
                    690:                bp->b_flags |= B_ERROR;
                    691:                bp->b_resid = bp->b_bcount;
                    692:                biodone(bp);
                    693:                return;
                    694:        }
                    695:        if (bp->b_bcount == 0) {
                    696:                db1_printf(("b_bcount is zero..\n"));
                    697:                biodone(bp);
                    698:                return;
                    699:        }
                    700:        lp = rs->sc_dkdev.dk_label;
                    701:
                    702:        /*
                    703:         * Do bounds checking and adjust transfer.  If there's an
                    704:         * error, the bounds check will flag that for us.
                    705:         */
                    706:
1.9       oster     707:        wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.1       oster     708:        if (DISKPART(bp->b_dev) != RAW_PART)
                    709:                if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
                    710:                        db1_printf(("Bounds check failed!!:%d %d\n",
1.9       oster     711:                                (int) bp->b_blkno, (int) wlabel));
1.1       oster     712:                        biodone(bp);
                    713:                        return;
                    714:                }
1.34      oster     715:        s = splbio();
1.1       oster     716:
                    717:        bp->b_resid = 0;
1.34      oster     718:
                    719:        /* stuff it onto our queue */
1.125     hannken   720:        BUFQ_PUT(&rs->buf_queue, bp);
1.34      oster     721:
                    722:        raidstart(raidPtrs[raidID]);
                    723:
1.1       oster     724:        splx(s);
                    725: }
                    726: /* ARGSUSED */
                    727: int
                    728: raidread(dev, uio, flags)
1.9       oster     729:        dev_t   dev;
1.1       oster     730:        struct uio *uio;
1.9       oster     731:        int     flags;
1.1       oster     732: {
1.9       oster     733:        int     unit = raidunit(dev);
1.1       oster     734:        struct raid_softc *rs;
1.9       oster     735:        int     part;
1.1       oster     736:
                    737:        if (unit >= numraid)
                    738:                return (ENXIO);
                    739:        rs = &raid_softc[unit];
                    740:
                    741:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    742:                return (ENXIO);
                    743:        part = DISKPART(dev);
                    744:
                    745:        return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
                    746:
                    747: }
                    748: /* ARGSUSED */
                    749: int
                    750: raidwrite(dev, uio, flags)
1.9       oster     751:        dev_t   dev;
1.1       oster     752:        struct uio *uio;
1.9       oster     753:        int     flags;
1.1       oster     754: {
1.9       oster     755:        int     unit = raidunit(dev);
1.1       oster     756:        struct raid_softc *rs;
                    757:
                    758:        if (unit >= numraid)
                    759:                return (ENXIO);
                    760:        rs = &raid_softc[unit];
                    761:
                    762:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    763:                return (ENXIO);
1.147     oster     764:
1.1       oster     765:        return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
                    766:
                    767: }
                    768:
                    769: int
                    770: raidioctl(dev, cmd, data, flag, p)
1.9       oster     771:        dev_t   dev;
                    772:        u_long  cmd;
1.1       oster     773:        caddr_t data;
1.9       oster     774:        int     flag;
1.1       oster     775:        struct proc *p;
                    776: {
1.9       oster     777:        int     unit = raidunit(dev);
                    778:        int     error = 0;
                    779:        int     part, pmask;
1.1       oster     780:        struct raid_softc *rs;
                    781:        RF_Config_t *k_cfg, *u_cfg;
1.42      oster     782:        RF_Raid_t *raidPtr;
1.48      oster     783:        RF_RaidDisk_t *diskPtr;
1.41      oster     784:        RF_AccTotals_t *totals;
                    785:        RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1       oster     786:        u_char *specific_buf;
1.11      oster     787:        int retcode = 0;
                    788:        int row;
                    789:        int column;
1.123     oster     790:        int raidid;
1.1       oster     791:        struct rf_recon_req *rrcopy, *rr;
1.48      oster     792:        RF_ComponentLabel_t *clabel;
1.11      oster     793:        RF_ComponentLabel_t ci_label;
1.48      oster     794:        RF_ComponentLabel_t **clabel_ptr;
1.12      oster     795:        RF_SingleComponent_t *sparePtr,*componentPtr;
                    796:        RF_SingleComponent_t hot_spare;
                    797:        RF_SingleComponent_t component;
1.83      oster     798:        RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41      oster     799:        int i, j, d;
1.102     fvdl      800: #ifdef __HAVE_OLD_DISKLABEL
                    801:        struct disklabel newlabel;
                    802: #endif
1.1       oster     803:
                    804:        if (unit >= numraid)
                    805:                return (ENXIO);
                    806:        rs = &raid_softc[unit];
1.42      oster     807:        raidPtr = raidPtrs[unit];
1.1       oster     808:
1.9       oster     809:        db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
                    810:                (int) DISKPART(dev), (int) unit, (int) cmd));
1.1       oster     811:
                    812:        /* Must be open for writes for these commands... */
                    813:        switch (cmd) {
                    814:        case DIOCSDINFO:
                    815:        case DIOCWDINFO:
1.102     fvdl      816: #ifdef __HAVE_OLD_DISKLABEL
                    817:        case ODIOCWDINFO:
                    818:        case ODIOCSDINFO:
                    819: #endif
1.1       oster     820:        case DIOCWLABEL:
                    821:                if ((flag & FWRITE) == 0)
                    822:                        return (EBADF);
                    823:        }
                    824:
                    825:        /* Must be initialized for these... */
                    826:        switch (cmd) {
                    827:        case DIOCGDINFO:
                    828:        case DIOCSDINFO:
                    829:        case DIOCWDINFO:
1.102     fvdl      830: #ifdef __HAVE_OLD_DISKLABEL
                    831:        case ODIOCGDINFO:
                    832:        case ODIOCWDINFO:
                    833:        case ODIOCSDINFO:
                    834:        case ODIOCGDEFLABEL:
                    835: #endif
1.1       oster     836:        case DIOCGPART:
                    837:        case DIOCWLABEL:
                    838:        case DIOCGDEFLABEL:
                    839:        case RAIDFRAME_SHUTDOWN:
                    840:        case RAIDFRAME_REWRITEPARITY:
                    841:        case RAIDFRAME_GET_INFO:
                    842:        case RAIDFRAME_RESET_ACCTOTALS:
                    843:        case RAIDFRAME_GET_ACCTOTALS:
                    844:        case RAIDFRAME_KEEP_ACCTOTALS:
                    845:        case RAIDFRAME_GET_SIZE:
                    846:        case RAIDFRAME_FAIL_DISK:
                    847:        case RAIDFRAME_COPYBACK:
1.37      oster     848:        case RAIDFRAME_CHECK_RECON_STATUS:
1.83      oster     849:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11      oster     850:        case RAIDFRAME_GET_COMPONENT_LABEL:
                    851:        case RAIDFRAME_SET_COMPONENT_LABEL:
                    852:        case RAIDFRAME_ADD_HOT_SPARE:
                    853:        case RAIDFRAME_REMOVE_HOT_SPARE:
                    854:        case RAIDFRAME_INIT_LABELS:
1.12      oster     855:        case RAIDFRAME_REBUILD_IN_PLACE:
1.23      oster     856:        case RAIDFRAME_CHECK_PARITY:
1.37      oster     857:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83      oster     858:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37      oster     859:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83      oster     860:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48      oster     861:        case RAIDFRAME_SET_AUTOCONFIG:
                    862:        case RAIDFRAME_SET_ROOT:
1.73      oster     863:        case RAIDFRAME_DELETE_COMPONENT:
                    864:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.1       oster     865:                if ((rs->sc_flags & RAIDF_INITED) == 0)
                    866:                        return (ENXIO);
                    867:        }
1.9       oster     868:
1.1       oster     869:        switch (cmd) {
                    870:
                    871:                /* configure the system */
                    872:        case RAIDFRAME_CONFIGURE:
1.48      oster     873:
                    874:                if (raidPtr->valid) {
                    875:                        /* There is a valid RAID set running on this unit! */
                    876:                        printf("raid%d: Device already configured!\n",unit);
1.66      oster     877:                        return(EINVAL);
1.48      oster     878:                }
                    879:
1.1       oster     880:                /* copy-in the configuration information */
                    881:                /* data points to a pointer to the configuration structure */
1.43      oster     882:
1.9       oster     883:                u_cfg = *((RF_Config_t **) data);
                    884:                RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1       oster     885:                if (k_cfg == NULL) {
1.9       oster     886:                        return (ENOMEM);
1.1       oster     887:                }
1.9       oster     888:                retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
                    889:                    sizeof(RF_Config_t));
1.1       oster     890:                if (retcode) {
1.33      oster     891:                        RF_Free(k_cfg, sizeof(RF_Config_t));
1.46      oster     892:                        db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9       oster     893:                                retcode));
                    894:                        return (retcode);
1.1       oster     895:                }
1.9       oster     896:                /* allocate a buffer for the layout-specific data, and copy it
                    897:                 * in */
1.1       oster     898:                if (k_cfg->layoutSpecificSize) {
1.9       oster     899:                        if (k_cfg->layoutSpecificSize > 10000) {
1.1       oster     900:                                /* sanity check */
1.33      oster     901:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.9       oster     902:                                return (EINVAL);
1.1       oster     903:                        }
1.9       oster     904:                        RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
                    905:                            (u_char *));
1.1       oster     906:                        if (specific_buf == NULL) {
1.9       oster     907:                                RF_Free(k_cfg, sizeof(RF_Config_t));
                    908:                                return (ENOMEM);
1.1       oster     909:                        }
1.9       oster     910:                        retcode = copyin(k_cfg->layoutSpecific,
                    911:                            (caddr_t) specific_buf,
                    912:                            k_cfg->layoutSpecificSize);
1.1       oster     913:                        if (retcode) {
1.33      oster     914:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.42      oster     915:                                RF_Free(specific_buf,
                    916:                                        k_cfg->layoutSpecificSize);
1.46      oster     917:                                db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9       oster     918:                                        retcode));
                    919:                                return (retcode);
1.1       oster     920:                        }
1.9       oster     921:                } else
                    922:                        specific_buf = NULL;
1.1       oster     923:                k_cfg->layoutSpecific = specific_buf;
1.9       oster     924:
                    925:                /* should do some kind of sanity check on the configuration.
                    926:                 * Store the sum of all the bytes in the last byte? */
1.1       oster     927:
                    928:                /* configure the system */
                    929:
1.48      oster     930:                /*
                    931:                 * Clear the entire RAID descriptor, just to make sure
                    932:                 *  there is no stale data left in the case of a
                    933:                 *  reconfiguration
                    934:                 */
1.108     thorpej   935:                memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1.42      oster     936:                raidPtr->raidid = unit;
1.20      oster     937:
1.48      oster     938:                retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1       oster     939:
1.40      oster     940:                if (retcode == 0) {
1.37      oster     941:
1.40      oster     942:                        /* allow this many simultaneous IO's to
                    943:                           this RAID device */
1.42      oster     944:                        raidPtr->openings = RAIDOUTSTANDING;
1.48      oster     945:
1.59      oster     946:                        raidinit(raidPtr);
                    947:                        rf_markalldirty(raidPtr);
1.9       oster     948:                }
1.1       oster     949:                /* free the buffers.  No return code here. */
                    950:                if (k_cfg->layoutSpecificSize) {
1.9       oster     951:                        RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1       oster     952:                }
1.9       oster     953:                RF_Free(k_cfg, sizeof(RF_Config_t));
                    954:
                    955:                return (retcode);
                    956:
                    957:                /* shutdown the system */
1.1       oster     958:        case RAIDFRAME_SHUTDOWN:
1.9       oster     959:
                    960:                if ((error = raidlock(rs)) != 0)
                    961:                        return (error);
1.1       oster     962:
                    963:                /*
                    964:                 * If somebody has a partition mounted, we shouldn't
                    965:                 * shutdown.
                    966:                 */
                    967:
                    968:                part = DISKPART(dev);
                    969:                pmask = (1 << part);
1.9       oster     970:                if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
                    971:                    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
                    972:                        (rs->sc_dkdev.dk_copenmask & pmask))) {
                    973:                        raidunlock(rs);
                    974:                        return (EBUSY);
                    975:                }
1.11      oster     976:
1.42      oster     977:                retcode = rf_Shutdown(raidPtr);
1.1       oster     978:
                    979:                /* It's no longer initialized... */
                    980:                rs->sc_flags &= ~RAIDF_INITED;
1.16      oster     981:
1.9       oster     982:                /* Detach the disk. */
                    983:                disk_detach(&rs->sc_dkdev);
1.1       oster     984:
                    985:                raidunlock(rs);
                    986:
1.9       oster     987:                return (retcode);
1.11      oster     988:        case RAIDFRAME_GET_COMPONENT_LABEL:
1.48      oster     989:                clabel_ptr = (RF_ComponentLabel_t **) data;
1.11      oster     990:                /* need to read the component label for the disk indicated
1.48      oster     991:                   by row,column in clabel */
1.11      oster     992:
                    993:                /* For practice, let's get it directly fromdisk, rather
                    994:                   than from the in-core copy */
1.48      oster     995:                RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1.11      oster     996:                           (RF_ComponentLabel_t *));
1.48      oster     997:                if (clabel == NULL)
1.11      oster     998:                        return (ENOMEM);
                    999:
1.108     thorpej  1000:                memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1.11      oster    1001:
1.48      oster    1002:                retcode = copyin( *clabel_ptr, clabel,
1.11      oster    1003:                                  sizeof(RF_ComponentLabel_t));
                   1004:
                   1005:                if (retcode) {
1.48      oster    1006:                        RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    1007:                        return(retcode);
                   1008:                }
                   1009:
1.48      oster    1010:                row = clabel->row;
                   1011:                column = clabel->column;
1.26      oster    1012:
1.42      oster    1013:                if ((row < 0) || (row >= raidPtr->numRow) ||
1.90      oster    1014:                    (column < 0) || (column >= raidPtr->numCol +
                   1015:                                     raidPtr->numSpare)) {
1.48      oster    1016:                        RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.26      oster    1017:                        return(EINVAL);
1.11      oster    1018:                }
                   1019:
1.48      oster    1020:                raidread_component_label(raidPtr->Disks[row][column].dev,
                   1021:                                raidPtr->raid_cinfo[row][column].ci_vp,
                   1022:                                clabel );
1.11      oster    1023:
1.48      oster    1024:                retcode = copyout((caddr_t) clabel,
                   1025:                                  (caddr_t) *clabel_ptr,
1.11      oster    1026:                                  sizeof(RF_ComponentLabel_t));
1.48      oster    1027:                RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    1028:                return (retcode);
                   1029:
                   1030:        case RAIDFRAME_SET_COMPONENT_LABEL:
1.48      oster    1031:                clabel = (RF_ComponentLabel_t *) data;
1.11      oster    1032:
                   1033:                /* XXX check the label for valid stuff... */
                   1034:                /* Note that some things *should not* get modified --
                   1035:                   the user should be re-initing the labels instead of
                   1036:                   trying to patch things.
                   1037:                   */
                   1038:
1.123     oster    1039:                raidid = raidPtr->raidid;
                   1040:                printf("raid%d: Got component label:\n", raidid);
                   1041:                printf("raid%d: Version: %d\n", raidid, clabel->version);
                   1042:                printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
                   1043:                printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
                   1044:                printf("raid%d: Row: %d\n", raidid, clabel->row);
                   1045:                printf("raid%d: Column: %d\n", raidid, clabel->column);
                   1046:                printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
                   1047:                printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
                   1048:                printf("raid%d: Clean: %d\n", raidid, clabel->clean);
                   1049:                printf("raid%d: Status: %d\n", raidid, clabel->status);
1.11      oster    1050:
1.48      oster    1051:                row = clabel->row;
                   1052:                column = clabel->column;
1.12      oster    1053:
1.42      oster    1054:                if ((row < 0) || (row >= raidPtr->numRow) ||
                   1055:                    (column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1056:                        return(EINVAL);
1.11      oster    1057:                }
1.12      oster    1058:
                   1059:                /* XXX this isn't allowed to do anything for now :-) */
1.48      oster    1060:
                   1061:                /* XXX and before it is, we need to fill in the rest
                   1062:                   of the fields!?!?!?! */
1.12      oster    1063: #if 0
1.11      oster    1064:                raidwrite_component_label(
1.42      oster    1065:                             raidPtr->Disks[row][column].dev,
                   1066:                            raidPtr->raid_cinfo[row][column].ci_vp,
1.48      oster    1067:                            clabel );
1.12      oster    1068: #endif
                   1069:                return (0);
1.11      oster    1070:
                   1071:        case RAIDFRAME_INIT_LABELS:
1.48      oster    1072:                clabel = (RF_ComponentLabel_t *) data;
1.11      oster    1073:                /*
                   1074:                   we only want the serial number from
                   1075:                   the above.  We get all the rest of the information
                   1076:                   from the config that was used to create this RAID
                   1077:                   set.
                   1078:                   */
1.12      oster    1079:
1.48      oster    1080:                raidPtr->serial_number = clabel->serial_number;
1.51      oster    1081:
                   1082:                raid_init_component_label(raidPtr, &ci_label);
                   1083:                ci_label.serial_number = clabel->serial_number;
1.11      oster    1084:
1.42      oster    1085:                for(row=0;row<raidPtr->numRow;row++) {
1.11      oster    1086:                        ci_label.row = row;
1.42      oster    1087:                        for(column=0;column<raidPtr->numCol;column++) {
1.48      oster    1088:                                diskPtr = &raidPtr->Disks[row][column];
1.98      oster    1089:                                if (!RF_DEAD_DISK(diskPtr->status)) {
1.94      oster    1090:                                        ci_label.partitionSize = diskPtr->partitionSize;
                   1091:                                        ci_label.column = column;
                   1092:                                        raidwrite_component_label(
                   1093:                                          raidPtr->Disks[row][column].dev,
                   1094:                                          raidPtr->raid_cinfo[row][column].ci_vp,
                   1095:                                          &ci_label );
                   1096:                                }
1.11      oster    1097:                        }
                   1098:                }
                   1099:
                   1100:                return (retcode);
1.48      oster    1101:        case RAIDFRAME_SET_AUTOCONFIG:
1.78      minoura  1102:                d = rf_set_autoconfig(raidPtr, *(int *) data);
1.123     oster    1103:                printf("raid%d: New autoconfig value is: %d\n",
                   1104:                       raidPtr->raidid, d);
1.78      minoura  1105:                *(int *) data = d;
1.48      oster    1106:                return (retcode);
                   1107:
                   1108:        case RAIDFRAME_SET_ROOT:
1.78      minoura  1109:                d = rf_set_rootpartition(raidPtr, *(int *) data);
1.123     oster    1110:                printf("raid%d: New rootpartition value is: %d\n",
                   1111:                       raidPtr->raidid, d);
1.78      minoura  1112:                *(int *) data = d;
1.48      oster    1113:                return (retcode);
1.9       oster    1114:
1.1       oster    1115:                /* initialize all parity */
                   1116:        case RAIDFRAME_REWRITEPARITY:
                   1117:
1.42      oster    1118:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17      oster    1119:                        /* Parity for RAID 0 is trivially correct */
1.42      oster    1120:                        raidPtr->parity_good = RF_RAID_CLEAN;
1.17      oster    1121:                        return(0);
                   1122:                }
1.37      oster    1123:
1.42      oster    1124:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.37      oster    1125:                        /* Re-write is already in progress! */
                   1126:                        return(EINVAL);
                   1127:                }
1.27      oster    1128:
1.42      oster    1129:                retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37      oster    1130:                                           rf_RewriteParityThread,
1.42      oster    1131:                                           raidPtr,"raid_parity");
1.9       oster    1132:                return (retcode);
                   1133:
1.11      oster    1134:
                   1135:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster    1136:                sparePtr = (RF_SingleComponent_t *) data;
                   1137:                memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1.42      oster    1138:                retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1.11      oster    1139:                return(retcode);
                   1140:
                   1141:        case RAIDFRAME_REMOVE_HOT_SPARE:
1.73      oster    1142:                return(retcode);
                   1143:
                   1144:        case RAIDFRAME_DELETE_COMPONENT:
                   1145:                componentPtr = (RF_SingleComponent_t *)data;
                   1146:                memcpy( &component, componentPtr,
                   1147:                        sizeof(RF_SingleComponent_t));
                   1148:                retcode = rf_delete_component(raidPtr, &component);
                   1149:                return(retcode);
                   1150:
                   1151:        case RAIDFRAME_INCORPORATE_HOT_SPARE:
                   1152:                componentPtr = (RF_SingleComponent_t *)data;
                   1153:                memcpy( &component, componentPtr,
                   1154:                        sizeof(RF_SingleComponent_t));
                   1155:                retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11      oster    1156:                return(retcode);
                   1157:
1.12      oster    1158:        case RAIDFRAME_REBUILD_IN_PLACE:
1.24      oster    1159:
1.42      oster    1160:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1161:                        /* Can't do this on a RAID 0!! */
                   1162:                        return(EINVAL);
                   1163:                }
                   1164:
1.42      oster    1165:                if (raidPtr->recon_in_progress == 1) {
1.37      oster    1166:                        /* a reconstruct is already in progress! */
                   1167:                        return(EINVAL);
                   1168:                }
                   1169:
1.12      oster    1170:                componentPtr = (RF_SingleComponent_t *) data;
                   1171:                memcpy( &component, componentPtr,
                   1172:                        sizeof(RF_SingleComponent_t));
                   1173:                row = component.row;
                   1174:                column = component.column;
1.147     oster    1175:
1.42      oster    1176:                if ((row < 0) || (row >= raidPtr->numRow) ||
                   1177:                    (column < 0) || (column >= raidPtr->numCol)) {
1.12      oster    1178:                        return(EINVAL);
                   1179:                }
1.37      oster    1180:
1.149     oster    1181:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1182:                if ((raidPtr->Disks[row][column].status == rf_ds_optimal) &&
                   1183:                    (raidPtr->numFailures > 0)) {
                   1184:                        /* XXX 0 above shouldn't be constant!!! */
                   1185:                        /* some component other than this has failed.
                   1186:                           Let's not make things worse than they already
                   1187:                           are... */
                   1188:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1189:                               raidPtr->raidid);
                   1190:                        printf("raid%d:     Row: %d Col: %d   Too many failures.\n",
                   1191:                               raidPtr->raidid, row, column);
                   1192:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1193:                        return (EINVAL);
                   1194:                }
                   1195:                if (raidPtr->Disks[row][column].status ==
                   1196:                    rf_ds_reconstructing) {
                   1197:                        printf("raid%d: Unable to reconstruct to disk at:\n",
                   1198:                               raidPtr->raidid);
                   1199:                        printf("raid%d:    Row: %d Col: %d   Reconstruction already occuring!\n", raidPtr->raidid, row, column);
                   1200:
                   1201:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1202:                        return (EINVAL);
                   1203:                }
                   1204:                if (raidPtr->Disks[row][column].status == rf_ds_spared) {
                   1205:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1206:                        return (EINVAL);
                   1207:                }
                   1208:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1209:
1.37      oster    1210:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1211:                if (rrcopy == NULL)
                   1212:                        return(ENOMEM);
1.37      oster    1213:
1.42      oster    1214:                rrcopy->raidPtr = (void *) raidPtr;
1.37      oster    1215:                rrcopy->row = row;
                   1216:                rrcopy->col = column;
                   1217:
1.42      oster    1218:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1219:                                           rf_ReconstructInPlaceThread,
                   1220:                                           rrcopy,"raid_reconip");
1.12      oster    1221:                return(retcode);
                   1222:
1.1       oster    1223:        case RAIDFRAME_GET_INFO:
1.42      oster    1224:                if (!raidPtr->valid)
1.41      oster    1225:                        return (ENODEV);
                   1226:                ucfgp = (RF_DeviceConfig_t **) data;
                   1227:                RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
                   1228:                          (RF_DeviceConfig_t *));
                   1229:                if (d_cfg == NULL)
                   1230:                        return (ENOMEM);
1.108     thorpej  1231:                memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1.42      oster    1232:                d_cfg->rows = raidPtr->numRow;
                   1233:                d_cfg->cols = raidPtr->numCol;
                   1234:                d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1.41      oster    1235:                if (d_cfg->ndevs >= RF_MAX_DISKS) {
                   1236:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1237:                        return (ENOMEM);
                   1238:                }
1.42      oster    1239:                d_cfg->nspares = raidPtr->numSpare;
1.41      oster    1240:                if (d_cfg->nspares >= RF_MAX_DISKS) {
                   1241:                        RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1242:                        return (ENOMEM);
                   1243:                }
1.42      oster    1244:                d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41      oster    1245:                d = 0;
                   1246:                for (i = 0; i < d_cfg->rows; i++) {
                   1247:                        for (j = 0; j < d_cfg->cols; j++) {
1.42      oster    1248:                                d_cfg->devs[d] = raidPtr->Disks[i][j];
1.41      oster    1249:                                d++;
1.1       oster    1250:                        }
1.41      oster    1251:                }
                   1252:                for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.42      oster    1253:                        d_cfg->spares[i] = raidPtr->Disks[0][j];
1.41      oster    1254:                }
                   1255:                retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
                   1256:                                  sizeof(RF_DeviceConfig_t));
                   1257:                RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
                   1258:
                   1259:                return (retcode);
1.9       oster    1260:
1.22      oster    1261:        case RAIDFRAME_CHECK_PARITY:
1.42      oster    1262:                *(int *) data = raidPtr->parity_good;
1.22      oster    1263:                return (0);
1.41      oster    1264:
1.1       oster    1265:        case RAIDFRAME_RESET_ACCTOTALS:
1.108     thorpej  1266:                memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41      oster    1267:                return (0);
1.9       oster    1268:
1.1       oster    1269:        case RAIDFRAME_GET_ACCTOTALS:
1.41      oster    1270:                totals = (RF_AccTotals_t *) data;
1.42      oster    1271:                *totals = raidPtr->acc_totals;
1.41      oster    1272:                return (0);
1.9       oster    1273:
1.1       oster    1274:        case RAIDFRAME_KEEP_ACCTOTALS:
1.42      oster    1275:                raidPtr->keep_acc_totals = *(int *)data;
1.41      oster    1276:                return (0);
1.9       oster    1277:
1.1       oster    1278:        case RAIDFRAME_GET_SIZE:
1.42      oster    1279:                *(int *) data = raidPtr->totalSectors;
1.9       oster    1280:                return (0);
1.1       oster    1281:
                   1282:                /* fail a disk & optionally start reconstruction */
                   1283:        case RAIDFRAME_FAIL_DISK:
1.24      oster    1284:
1.42      oster    1285:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1286:                        /* Can't do this on a RAID 0!! */
                   1287:                        return(EINVAL);
                   1288:                }
                   1289:
1.1       oster    1290:                rr = (struct rf_recon_req *) data;
1.9       oster    1291:
1.42      oster    1292:                if (rr->row < 0 || rr->row >= raidPtr->numRow
                   1293:                    || rr->col < 0 || rr->col >= raidPtr->numCol)
1.9       oster    1294:                        return (EINVAL);
1.149     oster    1295:
                   1296:
                   1297:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1298:                if ((raidPtr->Disks[rr->row][rr->col].status ==
                   1299:                     rf_ds_optimal) && (raidPtr->numFailures > 0)) {
                   1300:                        /* some other component has failed.  Let's not make
                   1301:                           things worse. XXX wrong for RAID6 */
                   1302:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1303:                        return (EINVAL);
                   1304:                }
                   1305:                if (raidPtr->Disks[rr->row][rr->col].status == rf_ds_spared) {
                   1306:                        /* Can't fail a spared disk! */
                   1307:                        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1308:                        return (EINVAL);
                   1309:                }
                   1310:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1       oster    1311:
1.9       oster    1312:                /* make a copy of the recon request so that we don't rely on
                   1313:                 * the user's buffer */
1.1       oster    1314:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38      oster    1315:                if (rrcopy == NULL)
                   1316:                        return(ENOMEM);
1.118     wiz      1317:                memcpy(rrcopy, rr, sizeof(*rr));
1.42      oster    1318:                rrcopy->raidPtr = (void *) raidPtr;
1.1       oster    1319:
1.42      oster    1320:                retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37      oster    1321:                                           rf_ReconThread,
                   1322:                                           rrcopy,"raid_recon");
1.9       oster    1323:                return (0);
                   1324:
                   1325:                /* invoke a copyback operation after recon on whatever disk
                   1326:                 * needs it, if any */
                   1327:        case RAIDFRAME_COPYBACK:
1.24      oster    1328:
1.42      oster    1329:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24      oster    1330:                        /* This makes no sense on a RAID 0!! */
                   1331:                        return(EINVAL);
                   1332:                }
                   1333:
1.42      oster    1334:                if (raidPtr->copyback_in_progress == 1) {
1.37      oster    1335:                        /* Copyback is already in progress! */
                   1336:                        return(EINVAL);
                   1337:                }
1.27      oster    1338:
1.42      oster    1339:                retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37      oster    1340:                                           rf_CopybackThread,
1.42      oster    1341:                                           raidPtr,"raid_copyback");
1.37      oster    1342:                return (retcode);
1.9       oster    1343:
1.1       oster    1344:                /* return the percentage completion of reconstruction */
1.37      oster    1345:        case RAIDFRAME_CHECK_RECON_STATUS:
1.42      oster    1346:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71      oster    1347:                        /* This makes no sense on a RAID 0, so tell the
                   1348:                           user it's done. */
                   1349:                        *(int *) data = 100;
                   1350:                        return(0);
1.24      oster    1351:                }
1.37      oster    1352:                row = 0; /* XXX we only consider a single row... */
1.42      oster    1353:                if (raidPtr->status[row] != rf_rs_reconstructing)
1.1       oster    1354:                        *(int *) data = 100;
1.9       oster    1355:                else
1.42      oster    1356:                        *(int *) data = raidPtr->reconControl[row]->percentComplete;
1.9       oster    1357:                return (0);
1.83      oster    1358:        case RAIDFRAME_CHECK_RECON_STATUS_EXT:
                   1359:                progressInfoPtr = (RF_ProgressInfo_t **) data;
                   1360:                row = 0; /* XXX we only consider a single row... */
                   1361:                if (raidPtr->status[row] != rf_rs_reconstructing) {
                   1362:                        progressInfo.remaining = 0;
                   1363:                        progressInfo.completed = 100;
                   1364:                        progressInfo.total = 100;
                   1365:                } else {
                   1366:                        progressInfo.total =
                   1367:                                raidPtr->reconControl[row]->numRUsTotal;
                   1368:                        progressInfo.completed =
                   1369:                                raidPtr->reconControl[row]->numRUsComplete;
                   1370:                        progressInfo.remaining = progressInfo.total -
                   1371:                                progressInfo.completed;
                   1372:                }
                   1373:                retcode = copyout((caddr_t) &progressInfo,
                   1374:                                  (caddr_t) *progressInfoPtr,
                   1375:                                  sizeof(RF_ProgressInfo_t));
                   1376:                return (retcode);
1.9       oster    1377:
1.37      oster    1378:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42      oster    1379:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80      oster    1380:                        /* This makes no sense on a RAID 0, so tell the
                   1381:                           user it's done. */
                   1382:                        *(int *) data = 100;
                   1383:                        return(0);
1.37      oster    1384:                }
1.42      oster    1385:                if (raidPtr->parity_rewrite_in_progress == 1) {
1.83      oster    1386:                        *(int *) data = 100 *
                   1387:                                raidPtr->parity_rewrite_stripes_done /
                   1388:                                raidPtr->Layout.numStripe;
1.37      oster    1389:                } else {
                   1390:                        *(int *) data = 100;
                   1391:                }
                   1392:                return (0);
                   1393:
1.83      oster    1394:        case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
                   1395:                progressInfoPtr = (RF_ProgressInfo_t **) data;
                   1396:                if (raidPtr->parity_rewrite_in_progress == 1) {
                   1397:                        progressInfo.total = raidPtr->Layout.numStripe;
                   1398:                        progressInfo.completed =
                   1399:                                raidPtr->parity_rewrite_stripes_done;
                   1400:                        progressInfo.remaining = progressInfo.total -
                   1401:                                progressInfo.completed;
                   1402:                } else {
                   1403:                        progressInfo.remaining = 0;
                   1404:                        progressInfo.completed = 100;
                   1405:                        progressInfo.total = 100;
                   1406:                }
                   1407:                retcode = copyout((caddr_t) &progressInfo,
                   1408:                                  (caddr_t) *progressInfoPtr,
                   1409:                                  sizeof(RF_ProgressInfo_t));
                   1410:                return (retcode);
                   1411:
1.37      oster    1412:        case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42      oster    1413:                if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37      oster    1414:                        /* This makes no sense on a RAID 0 */
1.83      oster    1415:                        *(int *) data = 100;
                   1416:                        return(0);
1.37      oster    1417:                }
1.42      oster    1418:                if (raidPtr->copyback_in_progress == 1) {
                   1419:                        *(int *) data = 100 * raidPtr->copyback_stripes_done /
                   1420:                                raidPtr->Layout.numStripe;
1.37      oster    1421:                } else {
                   1422:                        *(int *) data = 100;
                   1423:                }
                   1424:                return (0);
                   1425:
1.83      oster    1426:        case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93      oster    1427:                progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83      oster    1428:                if (raidPtr->copyback_in_progress == 1) {
                   1429:                        progressInfo.total = raidPtr->Layout.numStripe;
                   1430:                        progressInfo.completed =
1.93      oster    1431:                                raidPtr->copyback_stripes_done;
1.83      oster    1432:                        progressInfo.remaining = progressInfo.total -
                   1433:                                progressInfo.completed;
                   1434:                } else {
                   1435:                        progressInfo.remaining = 0;
                   1436:                        progressInfo.completed = 100;
                   1437:                        progressInfo.total = 100;
                   1438:                }
                   1439:                retcode = copyout((caddr_t) &progressInfo,
                   1440:                                  (caddr_t) *progressInfoPtr,
                   1441:                                  sizeof(RF_ProgressInfo_t));
                   1442:                return (retcode);
1.37      oster    1443:
1.9       oster    1444:                /* the sparetable daemon calls this to wait for the kernel to
                   1445:                 * need a spare table. this ioctl does not return until a
                   1446:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1447:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1448:                 * -- I should either compute the spare table in the kernel,
                   1449:                 * or have a different -- XXX XXX -- interface (a different
1.42      oster    1450:                 * character device) for delivering the table     -- XXX */
1.1       oster    1451: #if 0
                   1452:        case RAIDFRAME_SPARET_WAIT:
                   1453:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1454:                while (!rf_sparet_wait_queue)
                   1455:                        mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1.1       oster    1456:                waitreq = rf_sparet_wait_queue;
                   1457:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
                   1458:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1459:
1.42      oster    1460:                /* structure assignment */
                   1461:                *((RF_SparetWait_t *) data) = *waitreq;
1.9       oster    1462:
1.1       oster    1463:                RF_Free(waitreq, sizeof(*waitreq));
1.9       oster    1464:                return (0);
                   1465:
                   1466:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1467:                 * code in it that will cause the dameon to exit */
1.1       oster    1468:        case RAIDFRAME_ABORT_SPARET_WAIT:
                   1469:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1470:                waitreq->fcol = -1;
                   1471:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1472:                waitreq->next = rf_sparet_wait_queue;
                   1473:                rf_sparet_wait_queue = waitreq;
                   1474:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1475:                wakeup(&rf_sparet_wait_queue);
1.9       oster    1476:                return (0);
1.1       oster    1477:
1.9       oster    1478:                /* used by the spare table daemon to deliver a spare table
                   1479:                 * into the kernel */
1.1       oster    1480:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1481:
1.1       oster    1482:                /* install the spare table */
1.42      oster    1483:                retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9       oster    1484:
                   1485:                /* respond to the requestor.  the return status of the spare
                   1486:                 * table installation is passed in the "fcol" field */
1.1       oster    1487:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1488:                waitreq->fcol = retcode;
                   1489:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1490:                waitreq->next = rf_sparet_resp_queue;
                   1491:                rf_sparet_resp_queue = waitreq;
                   1492:                wakeup(&rf_sparet_resp_queue);
                   1493:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1494:
                   1495:                return (retcode);
1.1       oster    1496: #endif
                   1497:
1.9       oster    1498:        default:
1.36      oster    1499:                break; /* fall through to the os-specific code below */
1.1       oster    1500:
                   1501:        }
1.9       oster    1502:
1.42      oster    1503:        if (!raidPtr->valid)
1.9       oster    1504:                return (EINVAL);
                   1505:
1.1       oster    1506:        /*
                   1507:         * Add support for "regular" device ioctls here.
                   1508:         */
1.9       oster    1509:
1.1       oster    1510:        switch (cmd) {
                   1511:        case DIOCGDINFO:
1.9       oster    1512:                *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1       oster    1513:                break;
1.102     fvdl     1514: #ifdef __HAVE_OLD_DISKLABEL
                   1515:        case ODIOCGDINFO:
                   1516:                newlabel = *(rs->sc_dkdev.dk_label);
                   1517:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1518:                        return ENOTTY;
1.102     fvdl     1519:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1520:                break;
                   1521: #endif
1.1       oster    1522:
                   1523:        case DIOCGPART:
1.9       oster    1524:                ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
                   1525:                ((struct partinfo *) data)->part =
1.1       oster    1526:                    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
                   1527:                break;
                   1528:
                   1529:        case DIOCWDINFO:
                   1530:        case DIOCSDINFO:
1.102     fvdl     1531: #ifdef __HAVE_OLD_DISKLABEL
                   1532:        case ODIOCWDINFO:
                   1533:        case ODIOCSDINFO:
                   1534: #endif
                   1535:        {
                   1536:                struct disklabel *lp;
                   1537: #ifdef __HAVE_OLD_DISKLABEL
                   1538:                if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
                   1539:                        memset(&newlabel, 0, sizeof newlabel);
                   1540:                        memcpy(&newlabel, data, sizeof (struct olddisklabel));
                   1541:                        lp = &newlabel;
                   1542:                } else
                   1543: #endif
                   1544:                lp = (struct disklabel *)data;
                   1545:
1.1       oster    1546:                if ((error = raidlock(rs)) != 0)
                   1547:                        return (error);
                   1548:
                   1549:                rs->sc_flags |= RAIDF_LABELLING;
                   1550:
                   1551:                error = setdisklabel(rs->sc_dkdev.dk_label,
1.102     fvdl     1552:                    lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1       oster    1553:                if (error == 0) {
1.102     fvdl     1554:                        if (cmd == DIOCWDINFO
                   1555: #ifdef __HAVE_OLD_DISKLABEL
                   1556:                            || cmd == ODIOCWDINFO
                   1557: #endif
                   1558:                           )
1.1       oster    1559:                                error = writedisklabel(RAIDLABELDEV(dev),
                   1560:                                    raidstrategy, rs->sc_dkdev.dk_label,
                   1561:                                    rs->sc_dkdev.dk_cpulabel);
                   1562:                }
                   1563:                rs->sc_flags &= ~RAIDF_LABELLING;
                   1564:
                   1565:                raidunlock(rs);
                   1566:
                   1567:                if (error)
                   1568:                        return (error);
                   1569:                break;
1.102     fvdl     1570:        }
1.1       oster    1571:
                   1572:        case DIOCWLABEL:
1.9       oster    1573:                if (*(int *) data != 0)
1.1       oster    1574:                        rs->sc_flags |= RAIDF_WLABEL;
                   1575:                else
                   1576:                        rs->sc_flags &= ~RAIDF_WLABEL;
                   1577:                break;
                   1578:
                   1579:        case DIOCGDEFLABEL:
1.102     fvdl     1580:                raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1       oster    1581:                break;
1.102     fvdl     1582:
                   1583: #ifdef __HAVE_OLD_DISKLABEL
                   1584:        case ODIOCGDEFLABEL:
                   1585:                raidgetdefaultlabel(raidPtr, rs, &newlabel);
                   1586:                if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103     fvdl     1587:                        return ENOTTY;
1.102     fvdl     1588:                memcpy(data, &newlabel, sizeof (struct olddisklabel));
                   1589:                break;
                   1590: #endif
1.1       oster    1591:
                   1592:        default:
1.39      oster    1593:                retcode = ENOTTY;
1.1       oster    1594:        }
1.9       oster    1595:        return (retcode);
1.1       oster    1596:
                   1597: }
                   1598:
                   1599:
1.9       oster    1600: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1601:    RAIDframe device.  */
                   1602:
                   1603:
1.59      oster    1604: static void
                   1605: raidinit(raidPtr)
1.1       oster    1606:        RF_Raid_t *raidPtr;
                   1607: {
                   1608:        struct raid_softc *rs;
1.59      oster    1609:        int     unit;
1.1       oster    1610:
1.59      oster    1611:        unit = raidPtr->raidid;
1.1       oster    1612:
                   1613:        rs = &raid_softc[unit];
                   1614:
                   1615:        /* XXX should check return code first... */
                   1616:        rs->sc_flags |= RAIDF_INITED;
                   1617:
1.9       oster    1618:        sprintf(rs->sc_xname, "raid%d", unit);  /* XXX doesn't check bounds. */
1.1       oster    1619:
1.9       oster    1620:        rs->sc_dkdev.dk_name = rs->sc_xname;
1.11      oster    1621:
1.1       oster    1622:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1623:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1624:         * with disklabels. */
1.11      oster    1625:
1.1       oster    1626:        disk_attach(&rs->sc_dkdev);
                   1627:
                   1628:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1629:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1630:
1.9       oster    1631:        rs->sc_size = raidPtr->totalSectors;
1.11      oster    1632:
1.1       oster    1633: }
1.150     oster    1634: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1       oster    1635: /* wake up the daemon & tell it to get us a spare table
                   1636:  * XXX
1.9       oster    1637:  * the entries in the queues should be tagged with the raidPtr
1.11      oster    1638:  * so that in the extremely rare case that two recons happen at once,
                   1639:  * we know for which device were requesting a spare table
1.1       oster    1640:  * XXX
1.39      oster    1641:  *
                   1642:  * XXX This code is not currently used. GO
1.1       oster    1643:  */
1.9       oster    1644: int
                   1645: rf_GetSpareTableFromDaemon(req)
                   1646:        RF_SparetWait_t *req;
                   1647: {
                   1648:        int     retcode;
                   1649:
                   1650:        RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1651:        req->next = rf_sparet_wait_queue;
                   1652:        rf_sparet_wait_queue = req;
                   1653:        wakeup(&rf_sparet_wait_queue);
                   1654:
                   1655:        /* mpsleep unlocks the mutex */
                   1656:        while (!rf_sparet_resp_queue) {
1.15      oster    1657:                tsleep(&rf_sparet_resp_queue, PRIBIO,
1.9       oster    1658:                    "raidframe getsparetable", 0);
                   1659:        }
                   1660:        req = rf_sparet_resp_queue;
                   1661:        rf_sparet_resp_queue = req->next;
                   1662:        RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1663:
                   1664:        retcode = req->fcol;
                   1665:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1666:                                         * alloc'd */
                   1667:        return (retcode);
1.1       oster    1668: }
1.150     oster    1669: #endif
1.39      oster    1670:
1.11      oster    1671: /* a wrapper around rf_DoAccess that extracts appropriate info from the
                   1672:  * bp & passes it down.
1.1       oster    1673:  * any calls originating in the kernel must use non-blocking I/O
                   1674:  * do some extra sanity checking to return "appropriate" error values for
                   1675:  * certain conditions (to make some standard utilities work)
1.34      oster    1676:  *
                   1677:  * Formerly known as: rf_DoAccessKernel
1.1       oster    1678:  */
1.34      oster    1679: void
                   1680: raidstart(raidPtr)
1.9       oster    1681:        RF_Raid_t *raidPtr;
1.1       oster    1682: {
                   1683:        RF_SectorCount_t num_blocks, pb, sum;
                   1684:        RF_RaidAddr_t raid_addr;
                   1685:        struct partition *pp;
1.9       oster    1686:        daddr_t blocknum;
                   1687:        int     unit;
1.1       oster    1688:        struct raid_softc *rs;
1.9       oster    1689:        int     do_async;
1.34      oster    1690:        struct buf *bp;
1.1       oster    1691:
                   1692:        unit = raidPtr->raidid;
                   1693:        rs = &raid_softc[unit];
1.34      oster    1694:
1.56      oster    1695:        /* quick check to see if anything has died recently */
                   1696:        RF_LOCK_MUTEX(raidPtr->mutex);
                   1697:        if (raidPtr->numNewFailures > 0) {
1.151     oster    1698:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.91      oster    1699:                rf_update_component_labels(raidPtr,
                   1700:                                           RF_NORMAL_COMPONENT_UPDATE);
1.151     oster    1701:                RF_LOCK_MUTEX(raidPtr->mutex);
1.56      oster    1702:                raidPtr->numNewFailures--;
                   1703:        }
                   1704:
1.34      oster    1705:        /* Check to see if we're at the limit... */
                   1706:        while (raidPtr->openings > 0) {
                   1707:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1708:
                   1709:                /* get the next item, if any, from the queue */
1.125     hannken  1710:                if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1.34      oster    1711:                        /* nothing more to do */
                   1712:                        return;
                   1713:                }
                   1714:
                   1715:                /* Ok, for the bp we have here, bp->b_blkno is relative to the
                   1716:                 * partition.. Need to make it absolute to the underlying
                   1717:                 * device.. */
1.1       oster    1718:
1.34      oster    1719:                blocknum = bp->b_blkno;
                   1720:                if (DISKPART(bp->b_dev) != RAW_PART) {
                   1721:                        pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
                   1722:                        blocknum += pp->p_offset;
                   1723:                }
1.1       oster    1724:
1.34      oster    1725:                db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
                   1726:                            (int) blocknum));
                   1727:
                   1728:                db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   1729:                db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
                   1730:
                   1731:                /* *THIS* is where we adjust what block we're going to...
                   1732:                 * but DO NOT TOUCH bp->b_blkno!!! */
                   1733:                raid_addr = blocknum;
                   1734:
                   1735:                num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
                   1736:                pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
                   1737:                sum = raid_addr + num_blocks + pb;
                   1738:                if (1 || rf_debugKernelAccess) {
                   1739:                        db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   1740:                                    (int) raid_addr, (int) sum, (int) num_blocks,
                   1741:                                    (int) pb, (int) bp->b_resid));
                   1742:                }
                   1743:                if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
                   1744:                    || (sum < num_blocks) || (sum < pb)) {
                   1745:                        bp->b_error = ENOSPC;
                   1746:                        bp->b_flags |= B_ERROR;
                   1747:                        bp->b_resid = bp->b_bcount;
                   1748:                        biodone(bp);
                   1749:                        RF_LOCK_MUTEX(raidPtr->mutex);
                   1750:                        continue;
                   1751:                }
                   1752:                /*
                   1753:                 * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   1754:                 */
                   1755:
                   1756:                if (bp->b_bcount & raidPtr->sectorMask) {
                   1757:                        bp->b_error = EINVAL;
                   1758:                        bp->b_flags |= B_ERROR;
                   1759:                        bp->b_resid = bp->b_bcount;
                   1760:                        biodone(bp);
                   1761:                        RF_LOCK_MUTEX(raidPtr->mutex);
                   1762:                        continue;
                   1763:
                   1764:                }
                   1765:                db1_printf(("Calling DoAccess..\n"));
                   1766:
1.1       oster    1767:
1.34      oster    1768:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1769:                raidPtr->openings--;
                   1770:                RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1       oster    1771:
1.34      oster    1772:                /*
                   1773:                 * Everything is async.
                   1774:                 */
                   1775:                do_async = 1;
                   1776:
1.99      oster    1777:                disk_busy(&rs->sc_dkdev);
                   1778:
1.34      oster    1779:                /* XXX we're still at splbio() here... do we *really*
                   1780:                   need to be? */
1.20      oster    1781:
1.99      oster    1782:                /* don't ever condition on bp->b_flags & B_WRITE.
                   1783:                 * always condition on B_READ instead */
1.37      oster    1784:
1.151     oster    1785:                bp->b_error = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1.34      oster    1786:                                      RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   1787:                                      do_async, raid_addr, num_blocks,
1.109     oster    1788:                                      bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.151     oster    1789:
                   1790:                if (bp->b_error) {
                   1791:                        bp->b_flags |= B_ERROR;
                   1792:                }
1.20      oster    1793:
                   1794:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1795:        }
1.34      oster    1796:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1797: }
1.20      oster    1798:
                   1799:
1.7       explorer 1800:
                   1801:
1.1       oster    1802: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   1803:
1.9       oster    1804: int
                   1805: rf_DispatchKernelIO(queue, req)
                   1806:        RF_DiskQueue_t *queue;
                   1807:        RF_DiskQueueData_t *req;
1.1       oster    1808: {
1.9       oster    1809:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    1810:        struct buf *bp;
1.9       oster    1811:        struct raidbuf *raidbp = NULL;
                   1812:
1.1       oster    1813:        req->queue = queue;
1.9       oster    1814:
1.134     oster    1815: #if DIAGNOSTIC
                   1816:        if (queue->raidPtr->raidid >= numraid) {
1.137     itojun   1817:                printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
                   1818:                    numraid);
1.141     provos   1819:                panic("Invalid Unit number in rf_DispatchKernelIO");
1.1       oster    1820:        }
1.134     oster    1821: #endif
1.1       oster    1822:
                   1823:        bp = req->bp;
1.16      oster    1824: #if 1
1.9       oster    1825:        /* XXX when there is a physical disk failure, someone is passing us a
                   1826:         * buffer that contains old stuff!!  Attempt to deal with this problem
                   1827:         * without taking a performance hit... (not sure where the real bug
                   1828:         * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
1.4       oster    1829:
                   1830:        if (bp->b_flags & B_ERROR) {
                   1831:                bp->b_flags &= ~B_ERROR;
                   1832:        }
1.9       oster    1833:        if (bp->b_error != 0) {
1.4       oster    1834:                bp->b_error = 0;
                   1835:        }
1.16      oster    1836: #endif
1.136     oster    1837:        raidbp = pool_get(&raidframe_cbufpool, PR_NOWAIT);
1.1       oster    1838:
                   1839:        /*
                   1840:         * context for raidiodone
                   1841:         */
                   1842:        raidbp->rf_obp = bp;
                   1843:        raidbp->req = req;
                   1844:
1.32      oster    1845:        LIST_INIT(&raidbp->rf_buf.b_dep);
                   1846:
1.1       oster    1847:        switch (req->type) {
1.9       oster    1848:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
1.1       oster    1849:                /* XXX need to do something extra here.. */
1.9       oster    1850:                /* I'm leaving this in, as I've never actually seen it used,
                   1851:                 * and I'd like folks to report it... GO */
1.1       oster    1852:                printf(("WAKEUP CALLED\n"));
                   1853:                queue->numOutstanding++;
                   1854:
                   1855:                /* XXX need to glue the original buffer into this??  */
                   1856:
                   1857:                KernelWakeupFunc(&raidbp->rf_buf);
                   1858:                break;
1.9       oster    1859:
1.1       oster    1860:        case RF_IO_TYPE_READ:
                   1861:        case RF_IO_TYPE_WRITE:
1.9       oster    1862:
1.1       oster    1863:                if (req->tracerec) {
                   1864:                        RF_ETIMER_START(req->tracerec->timer);
                   1865:                }
1.9       oster    1866:                InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
                   1867:                    op | bp->b_flags, queue->rf_cinfo->ci_dev,
                   1868:                    req->sectorOffset, req->numSector,
                   1869:                    req->buf, KernelWakeupFunc, (void *) req,
                   1870:                    queue->raidPtr->logBytesPerSector, req->b_proc);
1.1       oster    1871:
                   1872:                if (rf_debugKernelAccess) {
1.9       oster    1873:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   1874:                                (long) bp->b_blkno));
1.1       oster    1875:                }
                   1876:                queue->numOutstanding++;
                   1877:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    1878:                /* acc wouldn't have been let in if there were any pending
                   1879:                 * reqs at any other priority */
1.1       oster    1880:                queue->curPriority = req->priority;
                   1881:
                   1882:                db1_printf(("Going for %c to unit %d row %d col %d\n",
1.134     oster    1883:                            req->type, queue->raidPtr->raidid,
                   1884:                            queue->row, queue->col));
1.1       oster    1885:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    1886:                        (int) req->sectorOffset, (int) req->numSector,
                   1887:                        (int) (req->numSector <<
                   1888:                            queue->raidPtr->logBytesPerSector),
                   1889:                        (int) queue->raidPtr->logBytesPerSector));
1.1       oster    1890:                if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
                   1891:                        raidbp->rf_buf.b_vp->v_numoutput++;
                   1892:                }
1.9       oster    1893:                VOP_STRATEGY(&raidbp->rf_buf);
1.1       oster    1894:
                   1895:                break;
1.9       oster    1896:
1.1       oster    1897:        default:
                   1898:                panic("bad req->type in rf_DispatchKernelIO");
                   1899:        }
                   1900:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.134     oster    1901:
1.9       oster    1902:        return (0);
1.1       oster    1903: }
1.9       oster    1904: /* this is the callback function associated with a I/O invoked from
1.1       oster    1905:    kernel code.
                   1906:  */
1.9       oster    1907: static void
                   1908: KernelWakeupFunc(vbp)
                   1909:        struct buf *vbp;
                   1910: {
                   1911:        RF_DiskQueueData_t *req = NULL;
                   1912:        RF_DiskQueue_t *queue;
                   1913:        struct raidbuf *raidbp = (struct raidbuf *) vbp;
                   1914:        struct buf *bp;
1.74      augustss 1915:        int s;
1.9       oster    1916:
1.36      oster    1917:        s = splbio();
1.9       oster    1918:        db1_printf(("recovering the request queue:\n"));
                   1919:        req = raidbp->req;
1.1       oster    1920:
1.9       oster    1921:        bp = raidbp->rf_obp;
1.1       oster    1922:
1.9       oster    1923:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    1924:
1.9       oster    1925:        if (raidbp->rf_buf.b_flags & B_ERROR) {
                   1926:                bp->b_flags |= B_ERROR;
                   1927:                bp->b_error = raidbp->rf_buf.b_error ?
                   1928:                    raidbp->rf_buf.b_error : EIO;
                   1929:        }
1.1       oster    1930:
1.9       oster    1931:        /* XXX methinks this could be wrong... */
1.1       oster    1932: #if 1
1.9       oster    1933:        bp->b_resid = raidbp->rf_buf.b_resid;
1.1       oster    1934: #endif
                   1935:
1.9       oster    1936:        if (req->tracerec) {
                   1937:                RF_ETIMER_STOP(req->tracerec->timer);
                   1938:                RF_ETIMER_EVAL(req->tracerec->timer);
                   1939:                RF_LOCK_MUTEX(rf_tracing_mutex);
                   1940:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1941:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1942:                req->tracerec->num_phys_ios++;
                   1943:                RF_UNLOCK_MUTEX(rf_tracing_mutex);
                   1944:        }
                   1945:        bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1.1       oster    1946:
1.9       oster    1947:        /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
                   1948:         * ballistic, and mark the component as hosed... */
1.36      oster    1949:
1.9       oster    1950:        if (bp->b_flags & B_ERROR) {
                   1951:                /* Mark the disk as dead */
                   1952:                /* but only mark it once... */
                   1953:                if (queue->raidPtr->Disks[queue->row][queue->col].status ==
                   1954:                    rf_ds_optimal) {
                   1955:                        printf("raid%d: IO Error.  Marking %s as failed.\n",
1.136     oster    1956:                               queue->raidPtr->raidid,
                   1957:                               queue->raidPtr->Disks[queue->row][queue->col].devname);
1.9       oster    1958:                        queue->raidPtr->Disks[queue->row][queue->col].status =
                   1959:                            rf_ds_failed;
                   1960:                        queue->raidPtr->status[queue->row] = rf_rs_degraded;
                   1961:                        queue->raidPtr->numFailures++;
1.56      oster    1962:                        queue->raidPtr->numNewFailures++;
1.9       oster    1963:                } else {        /* Disk is already dead... */
                   1964:                        /* printf("Disk already marked as dead!\n"); */
                   1965:                }
1.4       oster    1966:
1.9       oster    1967:        }
1.4       oster    1968:
1.136     oster    1969:        pool_put(&raidframe_cbufpool, raidbp);
1.9       oster    1970:
1.143     oster    1971:        /* Fill in the error value */
                   1972:
                   1973:        req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
                   1974:
                   1975:        simple_lock(&queue->raidPtr->iodone_lock);
                   1976:
                   1977:        /* Drop this one on the "finished" queue... */
                   1978:        TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
                   1979:
                   1980:        /* Let the raidio thread know there is work to be done. */
                   1981:        wakeup(&(queue->raidPtr->iodone));
                   1982:
                   1983:        simple_unlock(&queue->raidPtr->iodone_lock);
1.1       oster    1984:
1.36      oster    1985:        splx(s);
1.1       oster    1986: }
                   1987:
                   1988:
                   1989:
                   1990: /*
                   1991:  * initialize a buf structure for doing an I/O in the kernel.
                   1992:  */
1.9       oster    1993: static void
1.70      oster    1994: InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
                   1995:        logBytesPerSector, b_proc)
                   1996:        struct buf *bp;
                   1997:        struct vnode *b_vp;
                   1998:        unsigned rw_flag;
                   1999:        dev_t dev;
                   2000:        RF_SectorNum_t startSect;
                   2001:        RF_SectorCount_t numSect;
                   2002:        caddr_t buf;
                   2003:        void (*cbFunc) (struct buf *);
                   2004:        void *cbArg;
                   2005:        int logBytesPerSector;
                   2006:        struct proc *b_proc;
1.9       oster    2007: {
                   2008:        /* bp->b_flags       = B_PHYS | rw_flag; */
                   2009:        bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
                   2010:        bp->b_bcount = numSect << logBytesPerSector;
                   2011:        bp->b_bufsize = bp->b_bcount;
                   2012:        bp->b_error = 0;
                   2013:        bp->b_dev = dev;
1.79      thorpej  2014:        bp->b_data = buf;
1.9       oster    2015:        bp->b_blkno = startSect;
                   2016:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
1.1       oster    2017:        if (bp->b_bcount == 0) {
1.141     provos   2018:                panic("bp->b_bcount is zero in InitBP!!");
1.1       oster    2019:        }
1.9       oster    2020:        bp->b_proc = b_proc;
                   2021:        bp->b_iodone = cbFunc;
                   2022:        bp->b_vp = b_vp;
                   2023:
1.1       oster    2024: }
                   2025:
                   2026: static void
                   2027: raidgetdefaultlabel(raidPtr, rs, lp)
                   2028:        RF_Raid_t *raidPtr;
                   2029:        struct raid_softc *rs;
                   2030:        struct disklabel *lp;
                   2031: {
1.108     thorpej  2032:        memset(lp, 0, sizeof(*lp));
1.1       oster    2033:
                   2034:        /* fabricate a label... */
                   2035:        lp->d_secperunit = raidPtr->totalSectors;
                   2036:        lp->d_secsize = raidPtr->bytesPerSector;
1.45      oster    2037:        lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105     oster    2038:        lp->d_ntracks = 4 * raidPtr->numCol;
1.45      oster    2039:        lp->d_ncylinders = raidPtr->totalSectors /
                   2040:                (lp->d_nsectors * lp->d_ntracks);
1.1       oster    2041:        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
                   2042:
                   2043:        strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9       oster    2044:        lp->d_type = DTYPE_RAID;
1.1       oster    2045:        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
                   2046:        lp->d_rpm = 3600;
                   2047:        lp->d_interleave = 1;
                   2048:        lp->d_flags = 0;
                   2049:
                   2050:        lp->d_partitions[RAW_PART].p_offset = 0;
                   2051:        lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
                   2052:        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
                   2053:        lp->d_npartitions = RAW_PART + 1;
                   2054:
                   2055:        lp->d_magic = DISKMAGIC;
                   2056:        lp->d_magic2 = DISKMAGIC;
                   2057:        lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
                   2058:
                   2059: }
                   2060: /*
                   2061:  * Read the disklabel from the raid device.  If one is not present, fake one
                   2062:  * up.
                   2063:  */
                   2064: static void
                   2065: raidgetdisklabel(dev)
1.9       oster    2066:        dev_t   dev;
1.1       oster    2067: {
1.9       oster    2068:        int     unit = raidunit(dev);
1.1       oster    2069:        struct raid_softc *rs = &raid_softc[unit];
1.9       oster    2070:        char   *errstring;
1.1       oster    2071:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   2072:        struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
                   2073:        RF_Raid_t *raidPtr;
                   2074:
                   2075:        db1_printf(("Getting the disklabel...\n"));
                   2076:
1.108     thorpej  2077:        memset(clp, 0, sizeof(*clp));
1.1       oster    2078:
                   2079:        raidPtr = raidPtrs[unit];
                   2080:
                   2081:        raidgetdefaultlabel(raidPtr, rs, lp);
                   2082:
                   2083:        /*
                   2084:         * Call the generic disklabel extraction routine.
                   2085:         */
                   2086:        errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
                   2087:            rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9       oster    2088:        if (errstring)
1.1       oster    2089:                raidmakedisklabel(rs);
                   2090:        else {
1.9       oster    2091:                int     i;
1.1       oster    2092:                struct partition *pp;
                   2093:
                   2094:                /*
                   2095:                 * Sanity check whether the found disklabel is valid.
                   2096:                 *
                   2097:                 * This is necessary since total size of the raid device
                   2098:                 * may vary when an interleave is changed even though exactly
                   2099:                 * same componets are used, and old disklabel may used
                   2100:                 * if that is found.
                   2101:                 */
                   2102:                if (lp->d_secperunit != rs->sc_size)
1.123     oster    2103:                        printf("raid%d: WARNING: %s: "
1.1       oster    2104:                            "total sector size in disklabel (%d) != "
1.123     oster    2105:                            "the size of raid (%ld)\n", unit, rs->sc_xname,
1.18      oster    2106:                            lp->d_secperunit, (long) rs->sc_size);
1.1       oster    2107:                for (i = 0; i < lp->d_npartitions; i++) {
                   2108:                        pp = &lp->d_partitions[i];
                   2109:                        if (pp->p_offset + pp->p_size > rs->sc_size)
1.123     oster    2110:                                printf("raid%d: WARNING: %s: end of partition `%c' "
                   2111:                                       "exceeds the size of raid (%ld)\n",
                   2112:                                       unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
1.1       oster    2113:                }
                   2114:        }
                   2115:
                   2116: }
                   2117: /*
                   2118:  * Take care of things one might want to take care of in the event
                   2119:  * that a disklabel isn't present.
                   2120:  */
                   2121: static void
                   2122: raidmakedisklabel(rs)
                   2123:        struct raid_softc *rs;
                   2124: {
                   2125:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   2126:        db1_printf(("Making a label..\n"));
                   2127:
                   2128:        /*
                   2129:         * For historical reasons, if there's no disklabel present
                   2130:         * the raw partition must be marked FS_BSDFFS.
                   2131:         */
                   2132:
                   2133:        lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
                   2134:
                   2135:        strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
                   2136:
                   2137:        lp->d_checksum = dkcksum(lp);
                   2138: }
                   2139: /*
                   2140:  * Lookup the provided name in the filesystem.  If the file exists,
                   2141:  * is a valid block device, and isn't being used by anyone else,
                   2142:  * set *vpp to the file's vnode.
1.9       oster    2143:  * You'll find the original of this in ccd.c
1.1       oster    2144:  */
                   2145: int
                   2146: raidlookup(path, p, vpp)
1.9       oster    2147:        char   *path;
1.1       oster    2148:        struct proc *p;
                   2149:        struct vnode **vpp;     /* result */
                   2150: {
                   2151:        struct nameidata nd;
                   2152:        struct vnode *vp;
                   2153:        struct vattr va;
1.9       oster    2154:        int     error;
1.1       oster    2155:
                   2156:        NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1.9       oster    2157:        if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1.1       oster    2158:                return (error);
                   2159:        }
                   2160:        vp = nd.ni_vp;
                   2161:        if (vp->v_usecount > 1) {
                   2162:                VOP_UNLOCK(vp, 0);
1.9       oster    2163:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2164:                return (EBUSY);
                   2165:        }
                   2166:        if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
                   2167:                VOP_UNLOCK(vp, 0);
1.9       oster    2168:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2169:                return (error);
                   2170:        }
                   2171:        /* XXX: eventually we should handle VREG, too. */
                   2172:        if (va.va_type != VBLK) {
                   2173:                VOP_UNLOCK(vp, 0);
1.9       oster    2174:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    2175:                return (ENOTBLK);
                   2176:        }
                   2177:        VOP_UNLOCK(vp, 0);
                   2178:        *vpp = vp;
                   2179:        return (0);
                   2180: }
                   2181: /*
                   2182:  * Wait interruptibly for an exclusive lock.
                   2183:  *
                   2184:  * XXX
                   2185:  * Several drivers do this; it should be abstracted and made MP-safe.
                   2186:  * (Hmm... where have we seen this warning before :->  GO )
                   2187:  */
                   2188: static int
                   2189: raidlock(rs)
                   2190:        struct raid_softc *rs;
                   2191: {
1.9       oster    2192:        int     error;
1.1       oster    2193:
                   2194:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   2195:                rs->sc_flags |= RAIDF_WANTED;
1.9       oster    2196:                if ((error =
                   2197:                        tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1       oster    2198:                        return (error);
                   2199:        }
                   2200:        rs->sc_flags |= RAIDF_LOCKED;
                   2201:        return (0);
                   2202: }
                   2203: /*
                   2204:  * Unlock and wake up any waiters.
                   2205:  */
                   2206: static void
                   2207: raidunlock(rs)
                   2208:        struct raid_softc *rs;
                   2209: {
                   2210:
                   2211:        rs->sc_flags &= ~RAIDF_LOCKED;
                   2212:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   2213:                rs->sc_flags &= ~RAIDF_WANTED;
                   2214:                wakeup(rs);
                   2215:        }
1.11      oster    2216: }
                   2217:
                   2218:
                   2219: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   2220: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
                   2221:
                   2222: int
1.12      oster    2223: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
                   2224: {
1.48      oster    2225:        RF_ComponentLabel_t clabel;
                   2226:        raidread_component_label(dev, b_vp, &clabel);
                   2227:        clabel.mod_counter = mod_counter;
                   2228:        clabel.clean = RF_RAID_CLEAN;
                   2229:        raidwrite_component_label(dev, b_vp, &clabel);
1.12      oster    2230:        return(0);
                   2231: }
                   2232:
                   2233:
                   2234: int
                   2235: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1.11      oster    2236: {
1.48      oster    2237:        RF_ComponentLabel_t clabel;
                   2238:        raidread_component_label(dev, b_vp, &clabel);
                   2239:        clabel.mod_counter = mod_counter;
                   2240:        clabel.clean = RF_RAID_DIRTY;
                   2241:        raidwrite_component_label(dev, b_vp, &clabel);
1.11      oster    2242:        return(0);
                   2243: }
                   2244:
                   2245: /* ARGSUSED */
                   2246: int
1.48      oster    2247: raidread_component_label(dev, b_vp, clabel)
1.11      oster    2248:        dev_t dev;
                   2249:        struct vnode *b_vp;
1.48      oster    2250:        RF_ComponentLabel_t *clabel;
1.11      oster    2251: {
                   2252:        struct buf *bp;
1.130     gehenna  2253:        const struct bdevsw *bdev;
1.11      oster    2254:        int error;
                   2255:
                   2256:        /* XXX should probably ensure that we don't try to do this if
                   2257:           someone has changed rf_protected_sectors. */
                   2258:
1.98      oster    2259:        if (b_vp == NULL) {
                   2260:                /* For whatever reason, this component is not valid.
                   2261:                   Don't try to read a component label from it. */
                   2262:                return(EINVAL);
                   2263:        }
                   2264:
1.11      oster    2265:        /* get a block of the appropriate size... */
                   2266:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
                   2267:        bp->b_dev = dev;
                   2268:
                   2269:        /* get our ducks in a row for the read */
                   2270:        bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
                   2271:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100     chs      2272:        bp->b_flags |= B_READ;
1.11      oster    2273:        bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
                   2274:
1.130     gehenna  2275:        bdev = bdevsw_lookup(bp->b_dev);
                   2276:        if (bdev == NULL)
                   2277:                return (ENXIO);
                   2278:        (*bdev->d_strategy)(bp);
1.11      oster    2279:
                   2280:        error = biowait(bp);
                   2281:
                   2282:        if (!error) {
1.79      thorpej  2283:                memcpy(clabel, bp->b_data,
1.11      oster    2284:                       sizeof(RF_ComponentLabel_t));
1.147     oster    2285:         }
1.11      oster    2286:
                   2287:        brelse(bp);
                   2288:        return(error);
                   2289: }
                   2290: /* ARGSUSED */
                   2291: int
1.48      oster    2292: raidwrite_component_label(dev, b_vp, clabel)
1.11      oster    2293:        dev_t dev;
                   2294:        struct vnode *b_vp;
1.48      oster    2295:        RF_ComponentLabel_t *clabel;
1.11      oster    2296: {
                   2297:        struct buf *bp;
1.130     gehenna  2298:        const struct bdevsw *bdev;
1.11      oster    2299:        int error;
                   2300:
                   2301:        /* get a block of the appropriate size... */
                   2302:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
                   2303:        bp->b_dev = dev;
                   2304:
                   2305:        /* get our ducks in a row for the write */
                   2306:        bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
                   2307:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100     chs      2308:        bp->b_flags |= B_WRITE;
1.11      oster    2309:        bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
                   2310:
1.79      thorpej  2311:        memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
1.11      oster    2312:
1.79      thorpej  2313:        memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
1.11      oster    2314:
1.130     gehenna  2315:        bdev = bdevsw_lookup(bp->b_dev);
                   2316:        if (bdev == NULL)
                   2317:                return (ENXIO);
                   2318:        (*bdev->d_strategy)(bp);
1.11      oster    2319:        error = biowait(bp);
                   2320:        brelse(bp);
                   2321:        if (error) {
1.48      oster    2322: #if 1
1.11      oster    2323:                printf("Failed to write RAID component info!\n");
1.48      oster    2324: #endif
1.11      oster    2325:        }
                   2326:
                   2327:        return(error);
1.1       oster    2328: }
1.12      oster    2329:
                   2330: void
1.70      oster    2331: rf_markalldirty(raidPtr)
1.12      oster    2332:        RF_Raid_t *raidPtr;
                   2333: {
1.48      oster    2334:        RF_ComponentLabel_t clabel;
1.146     oster    2335:        int sparecol;
1.12      oster    2336:        int r,c;
1.146     oster    2337:        int i,j;
                   2338:        int srow, scol;
1.12      oster    2339:
                   2340:        raidPtr->mod_counter++;
                   2341:        for (r = 0; r < raidPtr->numRow; r++) {
                   2342:                for (c = 0; c < raidPtr->numCol; c++) {
1.98      oster    2343:                        /* we don't want to touch (at all) a disk that has
                   2344:                           failed */
                   2345:                        if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
1.12      oster    2346:                                raidread_component_label(
                   2347:                                        raidPtr->Disks[r][c].dev,
                   2348:                                        raidPtr->raid_cinfo[r][c].ci_vp,
1.48      oster    2349:                                        &clabel);
                   2350:                                if (clabel.status == rf_ds_spared) {
1.12      oster    2351:                                        /* XXX do something special...
                   2352:                                         but whatever you do, don't
                   2353:                                         try to access it!! */
                   2354:                                } else {
1.146     oster    2355:                                        raidmarkdirty(
                   2356:                                              raidPtr->Disks[r][c].dev,
                   2357:                                              raidPtr->raid_cinfo[r][c].ci_vp,
                   2358:                                              raidPtr->mod_counter);
1.12      oster    2359:                                }
                   2360:                        }
                   2361:                }
                   2362:        }
1.146     oster    2363:
1.12      oster    2364:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2365:                sparecol = raidPtr->numCol + c;
1.146     oster    2366:                if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
1.12      oster    2367:                        /*
                   2368:
                   2369:                           we claim this disk is "optimal" if it's
                   2370:                           rf_ds_used_spare, as that means it should be
                   2371:                           directly substitutable for the disk it replaced.
                   2372:                           We note that too...
                   2373:
                   2374:                         */
                   2375:
                   2376:                        for(i=0;i<raidPtr->numRow;i++) {
                   2377:                                for(j=0;j<raidPtr->numCol;j++) {
                   2378:                                        if ((raidPtr->Disks[i][j].spareRow ==
1.146     oster    2379:                                             0) &&
1.12      oster    2380:                                            (raidPtr->Disks[i][j].spareCol ==
                   2381:                                             sparecol)) {
1.146     oster    2382:                                                srow = i;
                   2383:                                                scol = j;
1.12      oster    2384:                                                break;
                   2385:                                        }
                   2386:                                }
                   2387:                        }
1.146     oster    2388:
1.12      oster    2389:                        raidread_component_label(
1.146     oster    2390:                                 raidPtr->Disks[0][sparecol].dev,
                   2391:                                 raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2392:                                 &clabel);
1.12      oster    2393:                        /* make sure status is noted */
1.146     oster    2394:
                   2395:                        raid_init_component_label(raidPtr, &clabel);
                   2396:
1.48      oster    2397:                        clabel.row = srow;
                   2398:                        clabel.column = scol;
1.146     oster    2399:                        /* Note: we *don't* change status from rf_ds_used_spare
                   2400:                           to rf_ds_optimal */
                   2401:                        /* clabel.status = rf_ds_optimal; */
                   2402:
                   2403:                        raidmarkdirty(raidPtr->Disks[0][sparecol].dev,
                   2404:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2405:                                      raidPtr->mod_counter);
1.12      oster    2406:                }
                   2407:        }
                   2408: }
                   2409:
1.13      oster    2410:
                   2411: void
1.91      oster    2412: rf_update_component_labels(raidPtr, final)
1.13      oster    2413:        RF_Raid_t *raidPtr;
1.91      oster    2414:        int final;
1.13      oster    2415: {
1.48      oster    2416:        RF_ComponentLabel_t clabel;
1.13      oster    2417:        int sparecol;
                   2418:        int r,c;
                   2419:        int i,j;
                   2420:        int srow, scol;
                   2421:
                   2422:        srow = -1;
                   2423:        scol = -1;
                   2424:
                   2425:        /* XXX should do extra checks to make sure things really are clean,
                   2426:           rather than blindly setting the clean bit... */
                   2427:
                   2428:        raidPtr->mod_counter++;
                   2429:
                   2430:        for (r = 0; r < raidPtr->numRow; r++) {
                   2431:                for (c = 0; c < raidPtr->numCol; c++) {
                   2432:                        if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
                   2433:                                raidread_component_label(
                   2434:                                        raidPtr->Disks[r][c].dev,
                   2435:                                        raidPtr->raid_cinfo[r][c].ci_vp,
1.48      oster    2436:                                        &clabel);
1.13      oster    2437:                                /* make sure status is noted */
1.48      oster    2438:                                clabel.status = rf_ds_optimal;
1.57      oster    2439:                                /* bump the counter */
1.60      oster    2440:                                clabel.mod_counter = raidPtr->mod_counter;
1.57      oster    2441:
1.13      oster    2442:                                raidwrite_component_label(
                   2443:                                        raidPtr->Disks[r][c].dev,
                   2444:                                        raidPtr->raid_cinfo[r][c].ci_vp,
1.48      oster    2445:                                        &clabel);
1.91      oster    2446:                                if (final == RF_FINAL_COMPONENT_UPDATE) {
                   2447:                                        if (raidPtr->parity_good == RF_RAID_CLEAN) {
                   2448:                                                raidmarkclean(
                   2449:                                                              raidPtr->Disks[r][c].dev,
                   2450:                                                              raidPtr->raid_cinfo[r][c].ci_vp,
                   2451:                                                              raidPtr->mod_counter);
                   2452:                                        }
                   2453:                                }
1.13      oster    2454:                        }
                   2455:                        /* else we don't touch it.. */
1.63      oster    2456:                }
                   2457:        }
                   2458:
                   2459:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2460:                sparecol = raidPtr->numCol + c;
1.110     oster    2461:                /* Need to ensure that the reconstruct actually completed! */
1.111     oster    2462:                if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
1.63      oster    2463:                        /*
                   2464:
                   2465:                           we claim this disk is "optimal" if it's
                   2466:                           rf_ds_used_spare, as that means it should be
                   2467:                           directly substitutable for the disk it replaced.
                   2468:                           We note that too...
                   2469:
                   2470:                         */
                   2471:
                   2472:                        for(i=0;i<raidPtr->numRow;i++) {
                   2473:                                for(j=0;j<raidPtr->numCol;j++) {
                   2474:                                        if ((raidPtr->Disks[i][j].spareRow ==
                   2475:                                             0) &&
                   2476:                                            (raidPtr->Disks[i][j].spareCol ==
                   2477:                                             sparecol)) {
                   2478:                                                srow = i;
                   2479:                                                scol = j;
                   2480:                                                break;
                   2481:                                        }
                   2482:                                }
                   2483:                        }
                   2484:
                   2485:                        /* XXX shouldn't *really* need this... */
                   2486:                        raidread_component_label(
                   2487:                                      raidPtr->Disks[0][sparecol].dev,
                   2488:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2489:                                      &clabel);
                   2490:                        /* make sure status is noted */
                   2491:
                   2492:                        raid_init_component_label(raidPtr, &clabel);
                   2493:
                   2494:                        clabel.mod_counter = raidPtr->mod_counter;
                   2495:                        clabel.row = srow;
                   2496:                        clabel.column = scol;
                   2497:                        clabel.status = rf_ds_optimal;
                   2498:
                   2499:                        raidwrite_component_label(
                   2500:                                      raidPtr->Disks[0][sparecol].dev,
                   2501:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2502:                                      &clabel);
1.91      oster    2503:                        if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13      oster    2504:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.91      oster    2505:                                        raidmarkclean( raidPtr->Disks[0][sparecol].dev,
                   2506:                                                       raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2507:                                                       raidPtr->mod_counter);
1.13      oster    2508:                                }
                   2509:                        }
                   2510:                }
                   2511:        }
1.68      oster    2512: }
                   2513:
                   2514: void
1.70      oster    2515: rf_close_component(raidPtr, vp, auto_configured)
1.69      oster    2516:        RF_Raid_t *raidPtr;
                   2517:        struct vnode *vp;
                   2518:        int auto_configured;
                   2519: {
                   2520:        struct proc *p;
                   2521:
                   2522:        p = raidPtr->engine_thread;
                   2523:
                   2524:        if (vp != NULL) {
                   2525:                if (auto_configured == 1) {
1.96      oster    2526:                        vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2527:                        VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.69      oster    2528:                        vput(vp);
                   2529:
                   2530:                } else {
                   2531:                        (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
                   2532:                }
1.147     oster    2533:        }
1.69      oster    2534: }
                   2535:
                   2536:
                   2537: void
1.70      oster    2538: rf_UnconfigureVnodes(raidPtr)
1.68      oster    2539:        RF_Raid_t *raidPtr;
                   2540: {
                   2541:        int r,c;
1.69      oster    2542:        struct vnode *vp;
                   2543:        int acd;
1.68      oster    2544:
                   2545:
                   2546:        /* We take this opportunity to close the vnodes like we should.. */
                   2547:
                   2548:        for (r = 0; r < raidPtr->numRow; r++) {
                   2549:                for (c = 0; c < raidPtr->numCol; c++) {
1.69      oster    2550:                        vp = raidPtr->raid_cinfo[r][c].ci_vp;
                   2551:                        acd = raidPtr->Disks[r][c].auto_configured;
                   2552:                        rf_close_component(raidPtr, vp, acd);
                   2553:                        raidPtr->raid_cinfo[r][c].ci_vp = NULL;
                   2554:                        raidPtr->Disks[r][c].auto_configured = 0;
1.68      oster    2555:                }
                   2556:        }
                   2557:        for (r = 0; r < raidPtr->numSpare; r++) {
1.69      oster    2558:                vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
                   2559:                acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
                   2560:                rf_close_component(raidPtr, vp, acd);
                   2561:                raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
                   2562:                raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
1.68      oster    2563:        }
1.37      oster    2564: }
1.63      oster    2565:
1.37      oster    2566:
                   2567: void
                   2568: rf_ReconThread(req)
                   2569:        struct rf_recon_req *req;
                   2570: {
                   2571:        int     s;
                   2572:        RF_Raid_t *raidPtr;
                   2573:
                   2574:        s = splbio();
                   2575:        raidPtr = (RF_Raid_t *) req->raidPtr;
                   2576:        raidPtr->recon_in_progress = 1;
                   2577:
                   2578:        rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
                   2579:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
                   2580:
                   2581:        /* XXX get rid of this! we don't need it at all.. */
                   2582:        RF_Free(req, sizeof(*req));
                   2583:
                   2584:        raidPtr->recon_in_progress = 0;
                   2585:        splx(s);
                   2586:
                   2587:        /* That's all... */
                   2588:        kthread_exit(0);        /* does not return */
                   2589: }
                   2590:
                   2591: void
                   2592: rf_RewriteParityThread(raidPtr)
                   2593:        RF_Raid_t *raidPtr;
                   2594: {
                   2595:        int retcode;
                   2596:        int s;
                   2597:
                   2598:        raidPtr->parity_rewrite_in_progress = 1;
                   2599:        s = splbio();
                   2600:        retcode = rf_RewriteParity(raidPtr);
                   2601:        splx(s);
                   2602:        if (retcode) {
                   2603:                printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
                   2604:        } else {
                   2605:                /* set the clean bit!  If we shutdown correctly,
                   2606:                   the clean bit on each component label will get
                   2607:                   set */
                   2608:                raidPtr->parity_good = RF_RAID_CLEAN;
                   2609:        }
                   2610:        raidPtr->parity_rewrite_in_progress = 0;
1.85      oster    2611:
                   2612:        /* Anyone waiting for us to stop?  If so, inform them... */
                   2613:        if (raidPtr->waitShutdown) {
                   2614:                wakeup(&raidPtr->parity_rewrite_in_progress);
                   2615:        }
1.37      oster    2616:
                   2617:        /* That's all... */
                   2618:        kthread_exit(0);        /* does not return */
                   2619: }
                   2620:
                   2621:
                   2622: void
                   2623: rf_CopybackThread(raidPtr)
                   2624:        RF_Raid_t *raidPtr;
                   2625: {
                   2626:        int s;
                   2627:
                   2628:        raidPtr->copyback_in_progress = 1;
                   2629:        s = splbio();
                   2630:        rf_CopybackReconstructedData(raidPtr);
                   2631:        splx(s);
                   2632:        raidPtr->copyback_in_progress = 0;
                   2633:
                   2634:        /* That's all... */
                   2635:        kthread_exit(0);        /* does not return */
                   2636: }
                   2637:
                   2638:
                   2639: void
                   2640: rf_ReconstructInPlaceThread(req)
                   2641:        struct rf_recon_req *req;
                   2642: {
                   2643:        int retcode;
                   2644:        int s;
                   2645:        RF_Raid_t *raidPtr;
                   2646:
                   2647:        s = splbio();
                   2648:        raidPtr = req->raidPtr;
                   2649:        raidPtr->recon_in_progress = 1;
                   2650:        retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
                   2651:        RF_Free(req, sizeof(*req));
                   2652:        raidPtr->recon_in_progress = 0;
                   2653:        splx(s);
                   2654:
                   2655:        /* That's all... */
                   2656:        kthread_exit(0);        /* does not return */
1.48      oster    2657: }
                   2658:
                   2659: RF_AutoConfig_t *
                   2660: rf_find_raid_components()
                   2661: {
                   2662:        struct vnode *vp;
                   2663:        struct disklabel label;
                   2664:        struct device *dv;
                   2665:        dev_t dev;
1.130     gehenna  2666:        int bmajor;
1.48      oster    2667:        int error;
                   2668:        int i;
                   2669:        int good_one;
                   2670:        RF_ComponentLabel_t *clabel;
                   2671:        RF_AutoConfig_t *ac_list;
                   2672:        RF_AutoConfig_t *ac;
                   2673:
                   2674:
                   2675:        /* initialize the AutoConfig list */
                   2676:        ac_list = NULL;
                   2677:
                   2678:        /* we begin by trolling through *all* the devices on the system */
                   2679:
                   2680:        for (dv = alldevs.tqh_first; dv != NULL;
                   2681:             dv = dv->dv_list.tqe_next) {
                   2682:
                   2683:                /* we are only interested in disks... */
                   2684:                if (dv->dv_class != DV_DISK)
                   2685:                        continue;
                   2686:
                   2687:                /* we don't care about floppies... */
1.140     thorpej  2688:                if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
1.119     leo      2689:                        continue;
                   2690:                }
1.129     oster    2691:
                   2692:                /* we don't care about CD's... */
1.140     thorpej  2693:                if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
1.129     oster    2694:                        continue;
                   2695:                }
                   2696:
1.120     leo      2697:                /* hdfd is the Atari/Hades floppy driver */
1.140     thorpej  2698:                if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
1.121     leo      2699:                        continue;
                   2700:                }
                   2701:                /* fdisa is the Atari/Milan floppy driver */
1.140     thorpej  2702:                if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
1.48      oster    2703:                        continue;
                   2704:                }
                   2705:
                   2706:                /* need to find the device_name_to_block_device_major stuff */
1.130     gehenna  2707:                bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
1.48      oster    2708:
                   2709:                /* get a vnode for the raw partition of this disk */
                   2710:
1.130     gehenna  2711:                dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
1.48      oster    2712:                if (bdevvp(dev, &vp))
                   2713:                        panic("RAID can't alloc vnode");
                   2714:
                   2715:                error = VOP_OPEN(vp, FREAD, NOCRED, 0);
                   2716:
                   2717:                if (error) {
                   2718:                        /* "Who cares."  Continue looking
                   2719:                           for something that exists*/
                   2720:                        vput(vp);
                   2721:                        continue;
                   2722:                }
                   2723:
                   2724:                /* Ok, the disk exists.  Go get the disklabel. */
                   2725:                error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
                   2726:                                  FREAD, NOCRED, 0);
                   2727:                if (error) {
                   2728:                        /*
                   2729:                         * XXX can't happen - open() would
                   2730:                         * have errored out (or faked up one)
                   2731:                         */
                   2732:                        printf("can't get label for dev %s%c (%d)!?!?\n",
                   2733:                               dv->dv_xname, 'a' + RAW_PART, error);
                   2734:                }
                   2735:
                   2736:                /* don't need this any more.  We'll allocate it again
                   2737:                   a little later if we really do... */
1.96      oster    2738:                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2739:                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48      oster    2740:                vput(vp);
                   2741:
                   2742:                for (i=0; i < label.d_npartitions; i++) {
                   2743:                        /* We only support partitions marked as RAID */
                   2744:                        if (label.d_partitions[i].p_fstype != FS_RAID)
                   2745:                                continue;
                   2746:
1.130     gehenna  2747:                        dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
1.48      oster    2748:                        if (bdevvp(dev, &vp))
                   2749:                                panic("RAID can't alloc vnode");
                   2750:
                   2751:                        error = VOP_OPEN(vp, FREAD, NOCRED, 0);
                   2752:                        if (error) {
                   2753:                                /* Whatever... */
                   2754:                                vput(vp);
                   2755:                                continue;
                   2756:                        }
                   2757:
                   2758:                        good_one = 0;
                   2759:
                   2760:                        clabel = (RF_ComponentLabel_t *)
                   2761:                                malloc(sizeof(RF_ComponentLabel_t),
                   2762:                                       M_RAIDFRAME, M_NOWAIT);
                   2763:                        if (clabel == NULL) {
                   2764:                                /* XXX CLEANUP HERE */
                   2765:                                printf("RAID auto config: out of memory!\n");
                   2766:                                return(NULL); /* XXX probably should panic? */
                   2767:                        }
                   2768:
                   2769:                        if (!raidread_component_label(dev, vp, clabel)) {
                   2770:                                /* Got the label.  Does it look reasonable? */
1.49      oster    2771:                                if (rf_reasonable_label(clabel) &&
1.54      oster    2772:                                    (clabel->partitionSize <=
1.48      oster    2773:                                     label.d_partitions[i].p_size)) {
                   2774: #if DEBUG
                   2775:                                        printf("Component on: %s%c: %d\n",
                   2776:                                               dv->dv_xname, 'a'+i,
                   2777:                                               label.d_partitions[i].p_size);
1.67      oster    2778:                                        rf_print_component_label(clabel);
1.48      oster    2779: #endif
                   2780:                                        /* if it's reasonable, add it,
                   2781:                                           else ignore it. */
                   2782:                                        ac = (RF_AutoConfig_t *)
                   2783:                                                malloc(sizeof(RF_AutoConfig_t),
                   2784:                                                       M_RAIDFRAME,
                   2785:                                                       M_NOWAIT);
                   2786:                                        if (ac == NULL) {
                   2787:                                                /* XXX should panic?? */
                   2788:                                                return(NULL);
                   2789:                                        }
                   2790:
                   2791:                                        sprintf(ac->devname, "%s%c",
                   2792:                                                dv->dv_xname, 'a'+i);
                   2793:                                        ac->dev = dev;
                   2794:                                        ac->vp = vp;
                   2795:                                        ac->clabel = clabel;
                   2796:                                        ac->next = ac_list;
                   2797:                                        ac_list = ac;
                   2798:                                        good_one = 1;
                   2799:                                }
                   2800:                        }
                   2801:                        if (!good_one) {
                   2802:                                /* cleanup */
                   2803:                                free(clabel, M_RAIDFRAME);
1.96      oster    2804:                                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97      oster    2805:                                VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48      oster    2806:                                vput(vp);
                   2807:                        }
                   2808:                }
                   2809:        }
1.106     oster    2810:        return(ac_list);
1.48      oster    2811: }
                   2812:
                   2813: static int
1.49      oster    2814: rf_reasonable_label(clabel)
1.48      oster    2815:        RF_ComponentLabel_t *clabel;
                   2816: {
                   2817:
                   2818:        if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
                   2819:             (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
                   2820:            ((clabel->clean == RF_RAID_CLEAN) ||
                   2821:             (clabel->clean == RF_RAID_DIRTY)) &&
                   2822:            clabel->row >=0 &&
                   2823:            clabel->column >= 0 &&
                   2824:            clabel->num_rows > 0 &&
                   2825:            clabel->num_columns > 0 &&
                   2826:            clabel->row < clabel->num_rows &&
                   2827:            clabel->column < clabel->num_columns &&
                   2828:            clabel->blockSize > 0 &&
                   2829:            clabel->numBlocks > 0) {
                   2830:                /* label looks reasonable enough... */
                   2831:                return(1);
                   2832:        }
                   2833:        return(0);
                   2834: }
                   2835:
                   2836:
1.138     oster    2837: #if DEBUG
1.48      oster    2838: void
1.67      oster    2839: rf_print_component_label(clabel)
1.48      oster    2840:        RF_ComponentLabel_t *clabel;
                   2841: {
                   2842:        printf("   Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
                   2843:               clabel->row, clabel->column,
                   2844:               clabel->num_rows, clabel->num_columns);
                   2845:        printf("   Version: %d Serial Number: %d Mod Counter: %d\n",
                   2846:               clabel->version, clabel->serial_number,
                   2847:               clabel->mod_counter);
                   2848:        printf("   Clean: %s Status: %d\n",
                   2849:               clabel->clean ? "Yes" : "No", clabel->status );
                   2850:        printf("   sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
                   2851:               clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
                   2852:        printf("   RAID Level: %c  blocksize: %d numBlocks: %d\n",
                   2853:               (char) clabel->parityConfig, clabel->blockSize,
                   2854:               clabel->numBlocks);
                   2855:        printf("   Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
1.75      oster    2856:        printf("   Contains root partition: %s\n",
                   2857:               clabel->root_partition ? "Yes" : "No" );
1.48      oster    2858:        printf("   Last configured as: raid%d\n", clabel->last_unit );
1.51      oster    2859: #if 0
                   2860:           printf("   Config order: %d\n", clabel->config_order);
                   2861: #endif
1.48      oster    2862:
                   2863: }
1.133     oster    2864: #endif
1.48      oster    2865:
                   2866: RF_ConfigSet_t *
                   2867: rf_create_auto_sets(ac_list)
                   2868:        RF_AutoConfig_t *ac_list;
                   2869: {
                   2870:        RF_AutoConfig_t *ac;
                   2871:        RF_ConfigSet_t *config_sets;
                   2872:        RF_ConfigSet_t *cset;
                   2873:        RF_AutoConfig_t *ac_next;
                   2874:
                   2875:
                   2876:        config_sets = NULL;
                   2877:
                   2878:        /* Go through the AutoConfig list, and figure out which components
                   2879:           belong to what sets.  */
                   2880:        ac = ac_list;
                   2881:        while(ac!=NULL) {
                   2882:                /* we're going to putz with ac->next, so save it here
                   2883:                   for use at the end of the loop */
                   2884:                ac_next = ac->next;
                   2885:
                   2886:                if (config_sets == NULL) {
                   2887:                        /* will need at least this one... */
                   2888:                        config_sets = (RF_ConfigSet_t *)
                   2889:                                malloc(sizeof(RF_ConfigSet_t),
                   2890:                                       M_RAIDFRAME, M_NOWAIT);
                   2891:                        if (config_sets == NULL) {
1.141     provos   2892:                                panic("rf_create_auto_sets: No memory!");
1.48      oster    2893:                        }
                   2894:                        /* this one is easy :) */
                   2895:                        config_sets->ac = ac;
                   2896:                        config_sets->next = NULL;
1.51      oster    2897:                        config_sets->rootable = 0;
1.48      oster    2898:                        ac->next = NULL;
                   2899:                } else {
                   2900:                        /* which set does this component fit into? */
                   2901:                        cset = config_sets;
                   2902:                        while(cset!=NULL) {
1.49      oster    2903:                                if (rf_does_it_fit(cset, ac)) {
1.86      oster    2904:                                        /* looks like it matches... */
                   2905:                                        ac->next = cset->ac;
                   2906:                                        cset->ac = ac;
1.48      oster    2907:                                        break;
                   2908:                                }
                   2909:                                cset = cset->next;
                   2910:                        }
                   2911:                        if (cset==NULL) {
                   2912:                                /* didn't find a match above... new set..*/
                   2913:                                cset = (RF_ConfigSet_t *)
                   2914:                                        malloc(sizeof(RF_ConfigSet_t),
                   2915:                                               M_RAIDFRAME, M_NOWAIT);
                   2916:                                if (cset == NULL) {
1.141     provos   2917:                                        panic("rf_create_auto_sets: No memory!");
1.48      oster    2918:                                }
                   2919:                                cset->ac = ac;
                   2920:                                ac->next = NULL;
                   2921:                                cset->next = config_sets;
1.51      oster    2922:                                cset->rootable = 0;
1.48      oster    2923:                                config_sets = cset;
                   2924:                        }
                   2925:                }
                   2926:                ac = ac_next;
                   2927:        }
                   2928:
                   2929:
                   2930:        return(config_sets);
                   2931: }
                   2932:
                   2933: static int
1.49      oster    2934: rf_does_it_fit(cset, ac)
1.48      oster    2935:        RF_ConfigSet_t *cset;
                   2936:        RF_AutoConfig_t *ac;
                   2937: {
                   2938:        RF_ComponentLabel_t *clabel1, *clabel2;
                   2939:
                   2940:        /* If this one matches the *first* one in the set, that's good
                   2941:           enough, since the other members of the set would have been
                   2942:           through here too... */
1.60      oster    2943:        /* note that we are not checking partitionSize here..
                   2944:
                   2945:           Note that we are also not checking the mod_counters here.
                   2946:           If everything else matches execpt the mod_counter, that's
                   2947:           good enough for this test.  We will deal with the mod_counters
                   2948:           a little later in the autoconfiguration process.
                   2949:
                   2950:            (clabel1->mod_counter == clabel2->mod_counter) &&
1.81      oster    2951:
                   2952:           The reason we don't check for this is that failed disks
                   2953:           will have lower modification counts.  If those disks are
                   2954:           not added to the set they used to belong to, then they will
                   2955:           form their own set, which may result in 2 different sets,
                   2956:           for example, competing to be configured at raid0, and
                   2957:           perhaps competing to be the root filesystem set.  If the
                   2958:           wrong ones get configured, or both attempt to become /,
                   2959:           weird behaviour and or serious lossage will occur.  Thus we
                   2960:           need to bring them into the fold here, and kick them out at
                   2961:           a later point.
1.60      oster    2962:
                   2963:        */
1.48      oster    2964:
                   2965:        clabel1 = cset->ac->clabel;
                   2966:        clabel2 = ac->clabel;
                   2967:        if ((clabel1->version == clabel2->version) &&
                   2968:            (clabel1->serial_number == clabel2->serial_number) &&
                   2969:            (clabel1->num_rows == clabel2->num_rows) &&
                   2970:            (clabel1->num_columns == clabel2->num_columns) &&
                   2971:            (clabel1->sectPerSU == clabel2->sectPerSU) &&
                   2972:            (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
                   2973:            (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
                   2974:            (clabel1->parityConfig == clabel2->parityConfig) &&
                   2975:            (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
                   2976:            (clabel1->blockSize == clabel2->blockSize) &&
                   2977:            (clabel1->numBlocks == clabel2->numBlocks) &&
                   2978:            (clabel1->autoconfigure == clabel2->autoconfigure) &&
                   2979:            (clabel1->root_partition == clabel2->root_partition) &&
                   2980:            (clabel1->last_unit == clabel2->last_unit) &&
                   2981:            (clabel1->config_order == clabel2->config_order)) {
                   2982:                /* if it get's here, it almost *has* to be a match */
                   2983:        } else {
                   2984:                /* it's not consistent with somebody in the set..
                   2985:                   punt */
                   2986:                return(0);
                   2987:        }
                   2988:        /* all was fine.. it must fit... */
                   2989:        return(1);
                   2990: }
                   2991:
                   2992: int
1.51      oster    2993: rf_have_enough_components(cset)
                   2994:        RF_ConfigSet_t *cset;
1.48      oster    2995: {
1.51      oster    2996:        RF_AutoConfig_t *ac;
                   2997:        RF_AutoConfig_t *auto_config;
                   2998:        RF_ComponentLabel_t *clabel;
                   2999:        int r,c;
                   3000:        int num_rows;
                   3001:        int num_cols;
                   3002:        int num_missing;
1.86      oster    3003:        int mod_counter;
1.87      oster    3004:        int mod_counter_found;
1.88      oster    3005:        int even_pair_failed;
                   3006:        char parity_type;
                   3007:
1.51      oster    3008:
1.48      oster    3009:        /* check to see that we have enough 'live' components
                   3010:           of this set.  If so, we can configure it if necessary */
                   3011:
1.51      oster    3012:        num_rows = cset->ac->clabel->num_rows;
                   3013:        num_cols = cset->ac->clabel->num_columns;
1.88      oster    3014:        parity_type = cset->ac->clabel->parityConfig;
1.51      oster    3015:
                   3016:        /* XXX Check for duplicate components!?!?!? */
                   3017:
1.86      oster    3018:        /* Determine what the mod_counter is supposed to be for this set. */
                   3019:
1.87      oster    3020:        mod_counter_found = 0;
1.101     oster    3021:        mod_counter = 0;
1.86      oster    3022:        ac = cset->ac;
                   3023:        while(ac!=NULL) {
1.87      oster    3024:                if (mod_counter_found==0) {
1.86      oster    3025:                        mod_counter = ac->clabel->mod_counter;
1.87      oster    3026:                        mod_counter_found = 1;
                   3027:                } else {
                   3028:                        if (ac->clabel->mod_counter > mod_counter) {
                   3029:                                mod_counter = ac->clabel->mod_counter;
                   3030:                        }
1.86      oster    3031:                }
                   3032:                ac = ac->next;
                   3033:        }
                   3034:
1.51      oster    3035:        num_missing = 0;
                   3036:        auto_config = cset->ac;
                   3037:
                   3038:        for(r=0; r<num_rows; r++) {
1.88      oster    3039:                even_pair_failed = 0;
1.51      oster    3040:                for(c=0; c<num_cols; c++) {
                   3041:                        ac = auto_config;
                   3042:                        while(ac!=NULL) {
                   3043:                                if ((ac->clabel->row == r) &&
1.86      oster    3044:                                    (ac->clabel->column == c) &&
                   3045:                                    (ac->clabel->mod_counter == mod_counter)) {
1.51      oster    3046:                                        /* it's this one... */
                   3047: #if DEBUG
                   3048:                                        printf("Found: %s at %d,%d\n",
                   3049:                                               ac->devname,r,c);
                   3050: #endif
                   3051:                                        break;
                   3052:                                }
                   3053:                                ac=ac->next;
                   3054:                        }
                   3055:                        if (ac==NULL) {
                   3056:                                /* Didn't find one here! */
1.88      oster    3057:                                /* special case for RAID 1, especially
                   3058:                                   where there are more than 2
                   3059:                                   components (where RAIDframe treats
                   3060:                                   things a little differently :( ) */
                   3061:                                if (parity_type == '1') {
                   3062:                                        if (c%2 == 0) { /* even component */
                   3063:                                                even_pair_failed = 1;
                   3064:                                        } else { /* odd component.  If
                   3065:                                                     we're failed, and
                   3066:                                                     so is the even
                   3067:                                                     component, it's
                   3068:                                                     "Good Night, Charlie" */
                   3069:                                                if (even_pair_failed == 1) {
                   3070:                                                        return(0);
                   3071:                                                }
                   3072:                                        }
                   3073:                                } else {
                   3074:                                        /* normal accounting */
                   3075:                                        num_missing++;
                   3076:                                }
                   3077:                        }
                   3078:                        if ((parity_type == '1') && (c%2 == 1)) {
                   3079:                                /* Just did an even component, and we didn't
                   3080:                                   bail.. reset the even_pair_failed flag,
                   3081:                                   and go on to the next component.... */
                   3082:                                even_pair_failed = 0;
1.51      oster    3083:                        }
                   3084:                }
                   3085:        }
                   3086:
                   3087:        clabel = cset->ac->clabel;
                   3088:
                   3089:        if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
                   3090:            ((clabel->parityConfig == '4') && (num_missing > 1)) ||
                   3091:            ((clabel->parityConfig == '5') && (num_missing > 1))) {
                   3092:                /* XXX this needs to be made *much* more general */
                   3093:                /* Too many failures */
                   3094:                return(0);
                   3095:        }
                   3096:        /* otherwise, all is well, and we've got enough to take a kick
                   3097:           at autoconfiguring this set */
                   3098:        return(1);
1.48      oster    3099: }
                   3100:
                   3101: void
1.49      oster    3102: rf_create_configuration(ac,config,raidPtr)
1.48      oster    3103:        RF_AutoConfig_t *ac;
                   3104:        RF_Config_t *config;
                   3105:        RF_Raid_t *raidPtr;
                   3106: {
                   3107:        RF_ComponentLabel_t *clabel;
1.77      oster    3108:        int i;
1.48      oster    3109:
                   3110:        clabel = ac->clabel;
                   3111:
                   3112:        /* 1. Fill in the common stuff */
                   3113:        config->numRow = clabel->num_rows;
                   3114:        config->numCol = clabel->num_columns;
                   3115:        config->numSpare = 0; /* XXX should this be set here? */
                   3116:        config->sectPerSU = clabel->sectPerSU;
                   3117:        config->SUsPerPU = clabel->SUsPerPU;
                   3118:        config->SUsPerRU = clabel->SUsPerRU;
                   3119:        config->parityConfig = clabel->parityConfig;
                   3120:        /* XXX... */
                   3121:        strcpy(config->diskQueueType,"fifo");
                   3122:        config->maxOutstandingDiskReqs = clabel->maxOutstanding;
                   3123:        config->layoutSpecificSize = 0; /* XXX ?? */
                   3124:
                   3125:        while(ac!=NULL) {
                   3126:                /* row/col values will be in range due to the checks
                   3127:                   in reasonable_label() */
                   3128:                strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
                   3129:                       ac->devname);
                   3130:                ac = ac->next;
                   3131:        }
                   3132:
1.77      oster    3133:        for(i=0;i<RF_MAXDBGV;i++) {
                   3134:                config->debugVars[i][0] = NULL;
                   3135:        }
1.48      oster    3136: }
                   3137:
                   3138: int
                   3139: rf_set_autoconfig(raidPtr, new_value)
                   3140:        RF_Raid_t *raidPtr;
                   3141:        int new_value;
                   3142: {
                   3143:        RF_ComponentLabel_t clabel;
                   3144:        struct vnode *vp;
                   3145:        dev_t dev;
                   3146:        int row, column;
1.148     oster    3147:        int sparecol;
1.48      oster    3148:
1.54      oster    3149:        raidPtr->autoconfigure = new_value;
1.48      oster    3150:        for(row=0; row<raidPtr->numRow; row++) {
                   3151:                for(column=0; column<raidPtr->numCol; column++) {
1.84      oster    3152:                        if (raidPtr->Disks[row][column].status ==
                   3153:                            rf_ds_optimal) {
                   3154:                                dev = raidPtr->Disks[row][column].dev;
                   3155:                                vp = raidPtr->raid_cinfo[row][column].ci_vp;
                   3156:                                raidread_component_label(dev, vp, &clabel);
                   3157:                                clabel.autoconfigure = new_value;
                   3158:                                raidwrite_component_label(dev, vp, &clabel);
                   3159:                        }
1.48      oster    3160:                }
                   3161:        }
1.148     oster    3162:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3163:                sparecol = raidPtr->numCol + column;
                   3164:                if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
                   3165:                        dev = raidPtr->Disks[0][sparecol].dev;
                   3166:                        vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
                   3167:                        raidread_component_label(dev, vp, &clabel);
                   3168:                        clabel.autoconfigure = new_value;
                   3169:                        raidwrite_component_label(dev, vp, &clabel);
                   3170:                }
                   3171:        }
1.48      oster    3172:        return(new_value);
                   3173: }
                   3174:
                   3175: int
                   3176: rf_set_rootpartition(raidPtr, new_value)
                   3177:        RF_Raid_t *raidPtr;
                   3178:        int new_value;
                   3179: {
                   3180:        RF_ComponentLabel_t clabel;
                   3181:        struct vnode *vp;
                   3182:        dev_t dev;
                   3183:        int row, column;
1.148     oster    3184:        int sparecol;
1.48      oster    3185:
1.54      oster    3186:        raidPtr->root_partition = new_value;
1.48      oster    3187:        for(row=0; row<raidPtr->numRow; row++) {
                   3188:                for(column=0; column<raidPtr->numCol; column++) {
1.84      oster    3189:                        if (raidPtr->Disks[row][column].status ==
                   3190:                            rf_ds_optimal) {
                   3191:                                dev = raidPtr->Disks[row][column].dev;
                   3192:                                vp = raidPtr->raid_cinfo[row][column].ci_vp;
                   3193:                                raidread_component_label(dev, vp, &clabel);
                   3194:                                clabel.root_partition = new_value;
                   3195:                                raidwrite_component_label(dev, vp, &clabel);
                   3196:                        }
1.148     oster    3197:                }
                   3198:        }
                   3199:        for(column = 0; column < raidPtr->numSpare ; column++) {
                   3200:                sparecol = raidPtr->numCol + column;
                   3201:                if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
                   3202:                        dev = raidPtr->Disks[0][sparecol].dev;
                   3203:                        vp = raidPtr->raid_cinfo[0][sparecol].ci_vp;
                   3204:                        raidread_component_label(dev, vp, &clabel);
                   3205:                        clabel.root_partition = new_value;
                   3206:                        raidwrite_component_label(dev, vp, &clabel);
1.48      oster    3207:                }
                   3208:        }
                   3209:        return(new_value);
                   3210: }
                   3211:
                   3212: void
1.49      oster    3213: rf_release_all_vps(cset)
1.48      oster    3214:        RF_ConfigSet_t *cset;
                   3215: {
                   3216:        RF_AutoConfig_t *ac;
                   3217:
                   3218:        ac = cset->ac;
                   3219:        while(ac!=NULL) {
                   3220:                /* Close the vp, and give it back */
                   3221:                if (ac->vp) {
1.96      oster    3222:                        vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.48      oster    3223:                        VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
                   3224:                        vput(ac->vp);
1.86      oster    3225:                        ac->vp = NULL;
1.48      oster    3226:                }
                   3227:                ac = ac->next;
                   3228:        }
                   3229: }
                   3230:
                   3231:
                   3232: void
1.49      oster    3233: rf_cleanup_config_set(cset)
1.48      oster    3234:        RF_ConfigSet_t *cset;
                   3235: {
                   3236:        RF_AutoConfig_t *ac;
                   3237:        RF_AutoConfig_t *next_ac;
                   3238:
                   3239:        ac = cset->ac;
                   3240:        while(ac!=NULL) {
                   3241:                next_ac = ac->next;
                   3242:                /* nuke the label */
                   3243:                free(ac->clabel, M_RAIDFRAME);
                   3244:                /* cleanup the config structure */
                   3245:                free(ac, M_RAIDFRAME);
                   3246:                /* "next.." */
                   3247:                ac = next_ac;
                   3248:        }
                   3249:        /* and, finally, nuke the config set */
                   3250:        free(cset, M_RAIDFRAME);
                   3251: }
                   3252:
                   3253:
                   3254: void
                   3255: raid_init_component_label(raidPtr, clabel)
                   3256:        RF_Raid_t *raidPtr;
                   3257:        RF_ComponentLabel_t *clabel;
                   3258: {
                   3259:        /* current version number */
                   3260:        clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57      oster    3261:        clabel->serial_number = raidPtr->serial_number;
1.48      oster    3262:        clabel->mod_counter = raidPtr->mod_counter;
                   3263:        clabel->num_rows = raidPtr->numRow;
                   3264:        clabel->num_columns = raidPtr->numCol;
                   3265:        clabel->clean = RF_RAID_DIRTY; /* not clean */
                   3266:        clabel->status = rf_ds_optimal; /* "It's good!" */
                   3267:
                   3268:        clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
                   3269:        clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
                   3270:        clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54      oster    3271:
                   3272:        clabel->blockSize = raidPtr->bytesPerSector;
                   3273:        clabel->numBlocks = raidPtr->sectorsPerDisk;
                   3274:
1.48      oster    3275:        /* XXX not portable */
                   3276:        clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54      oster    3277:        clabel->maxOutstanding = raidPtr->maxOutstanding;
                   3278:        clabel->autoconfigure = raidPtr->autoconfigure;
                   3279:        clabel->root_partition = raidPtr->root_partition;
1.48      oster    3280:        clabel->last_unit = raidPtr->raidid;
1.54      oster    3281:        clabel->config_order = raidPtr->config_order;
1.51      oster    3282: }
                   3283:
                   3284: int
                   3285: rf_auto_config_set(cset,unit)
                   3286:        RF_ConfigSet_t *cset;
                   3287:        int *unit;
                   3288: {
                   3289:        RF_Raid_t *raidPtr;
                   3290:        RF_Config_t *config;
                   3291:        int raidID;
                   3292:        int retcode;
                   3293:
1.127     oster    3294: #if DEBUG
1.72      oster    3295:        printf("RAID autoconfigure\n");
1.127     oster    3296: #endif
1.51      oster    3297:
                   3298:        retcode = 0;
                   3299:        *unit = -1;
                   3300:
                   3301:        /* 1. Create a config structure */
                   3302:
                   3303:        config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
                   3304:                                       M_RAIDFRAME,
                   3305:                                       M_NOWAIT);
                   3306:        if (config==NULL) {
                   3307:                printf("Out of mem!?!?\n");
                   3308:                                /* XXX do something more intelligent here. */
                   3309:                return(1);
                   3310:        }
1.77      oster    3311:
                   3312:        memset(config, 0, sizeof(RF_Config_t));
1.51      oster    3313:
                   3314:        /*
                   3315:           2. Figure out what RAID ID this one is supposed to live at
                   3316:           See if we can get the same RAID dev that it was configured
                   3317:           on last time..
                   3318:        */
                   3319:
                   3320:        raidID = cset->ac->clabel->last_unit;
1.52      oster    3321:        if ((raidID < 0) || (raidID >= numraid)) {
1.51      oster    3322:                /* let's not wander off into lala land. */
                   3323:                raidID = numraid - 1;
                   3324:        }
                   3325:        if (raidPtrs[raidID]->valid != 0) {
                   3326:
                   3327:                /*
                   3328:                   Nope... Go looking for an alternative...
                   3329:                   Start high so we don't immediately use raid0 if that's
                   3330:                   not taken.
                   3331:                */
                   3332:
1.115     oster    3333:                for(raidID = numraid - 1; raidID >= 0; raidID--) {
1.51      oster    3334:                        if (raidPtrs[raidID]->valid == 0) {
                   3335:                                /* can use this one! */
                   3336:                                break;
                   3337:                        }
                   3338:                }
                   3339:        }
                   3340:
                   3341:        if (raidID < 0) {
                   3342:                /* punt... */
                   3343:                printf("Unable to auto configure this set!\n");
                   3344:                printf("(Out of RAID devs!)\n");
                   3345:                return(1);
                   3346:        }
1.127     oster    3347:
                   3348: #if DEBUG
1.72      oster    3349:        printf("Configuring raid%d:\n",raidID);
1.127     oster    3350: #endif
                   3351:
1.51      oster    3352:        raidPtr = raidPtrs[raidID];
                   3353:
                   3354:        /* XXX all this stuff should be done SOMEWHERE ELSE! */
                   3355:        raidPtr->raidid = raidID;
                   3356:        raidPtr->openings = RAIDOUTSTANDING;
                   3357:
                   3358:        /* 3. Build the configuration structure */
                   3359:        rf_create_configuration(cset->ac, config, raidPtr);
                   3360:
                   3361:        /* 4. Do the configuration */
                   3362:        retcode = rf_Configure(raidPtr, config, cset->ac);
                   3363:
                   3364:        if (retcode == 0) {
1.61      oster    3365:
1.59      oster    3366:                raidinit(raidPtrs[raidID]);
                   3367:
                   3368:                rf_markalldirty(raidPtrs[raidID]);
1.54      oster    3369:                raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
1.51      oster    3370:                if (cset->ac->clabel->root_partition==1) {
                   3371:                        /* everything configured just fine.  Make a note
                   3372:                           that this set is eligible to be root. */
                   3373:                        cset->rootable = 1;
1.54      oster    3374:                        /* XXX do this here? */
                   3375:                        raidPtrs[raidID]->root_partition = 1;
1.51      oster    3376:                }
                   3377:        }
                   3378:
                   3379:        /* 5. Cleanup */
                   3380:        free(config, M_RAIDFRAME);
                   3381:
                   3382:        *unit = raidID;
                   3383:        return(retcode);
1.99      oster    3384: }
                   3385:
                   3386: void
                   3387: rf_disk_unbusy(desc)
                   3388:        RF_RaidAccessDesc_t *desc;
                   3389: {
                   3390:        struct buf *bp;
                   3391:
                   3392:        bp = (struct buf *)desc->bp;
                   3393:        disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
1.145     mrg      3394:            (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
1.13      oster    3395: }

CVSweb <webmaster@jp.NetBSD.org>