[BACK]Return to rf_netbsdkintf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / dev / raidframe

Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.29.8.1

1.29.8.1! wrstuden    1: /*     $NetBSD: rf_netbsdkintf.c,v 1.29 1999/08/14 23:34:18 oster Exp $        */
1.1       oster       2: /*-
                      3:  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
                      4:  * All rights reserved.
                      5:  *
                      6:  * This code is derived from software contributed to The NetBSD Foundation
                      7:  * by Greg Oster; Jason R. Thorpe.
                      8:  *
                      9:  * Redistribution and use in source and binary forms, with or without
                     10:  * modification, are permitted provided that the following conditions
                     11:  * are met:
                     12:  * 1. Redistributions of source code must retain the above copyright
                     13:  *    notice, this list of conditions and the following disclaimer.
                     14:  * 2. Redistributions in binary form must reproduce the above copyright
                     15:  *    notice, this list of conditions and the following disclaimer in the
                     16:  *    documentation and/or other materials provided with the distribution.
                     17:  * 3. All advertising materials mentioning features or use of this software
                     18:  *    must display the following acknowledgement:
                     19:  *        This product includes software developed by the NetBSD
                     20:  *        Foundation, Inc. and its contributors.
                     21:  * 4. Neither the name of The NetBSD Foundation nor the names of its
                     22:  *    contributors may be used to endorse or promote products derived
                     23:  *    from this software without specific prior written permission.
                     24:  *
                     25:  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
                     26:  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
                     27:  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
                     28:  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
                     29:  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
                     30:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
                     31:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
                     32:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
                     33:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
                     34:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
                     35:  * POSSIBILITY OF SUCH DAMAGE.
                     36:  */
                     37:
                     38: /*
                     39:  * Copyright (c) 1988 University of Utah.
                     40:  * Copyright (c) 1990, 1993
                     41:  *      The Regents of the University of California.  All rights reserved.
                     42:  *
                     43:  * This code is derived from software contributed to Berkeley by
                     44:  * the Systems Programming Group of the University of Utah Computer
                     45:  * Science Department.
                     46:  *
                     47:  * Redistribution and use in source and binary forms, with or without
                     48:  * modification, are permitted provided that the following conditions
                     49:  * are met:
                     50:  * 1. Redistributions of source code must retain the above copyright
                     51:  *    notice, this list of conditions and the following disclaimer.
                     52:  * 2. Redistributions in binary form must reproduce the above copyright
                     53:  *    notice, this list of conditions and the following disclaimer in the
                     54:  *    documentation and/or other materials provided with the distribution.
                     55:  * 3. All advertising materials mentioning features or use of this software
                     56:  *    must display the following acknowledgement:
                     57:  *      This product includes software developed by the University of
                     58:  *      California, Berkeley and its contributors.
                     59:  * 4. Neither the name of the University nor the names of its contributors
                     60:  *    may be used to endorse or promote products derived from this software
                     61:  *    without specific prior written permission.
                     62:  *
                     63:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     64:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     65:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     66:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     67:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     68:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     69:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     70:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     71:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     72:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     73:  * SUCH DAMAGE.
                     74:  *
                     75:  * from: Utah $Hdr: cd.c 1.6 90/11/28$
                     76:  *
                     77:  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
                     78:  */
                     79:
                     80:
                     81:
                     82:
                     83: /*
                     84:  * Copyright (c) 1995 Carnegie-Mellon University.
                     85:  * All rights reserved.
                     86:  *
                     87:  * Authors: Mark Holland, Jim Zelenka
                     88:  *
                     89:  * Permission to use, copy, modify and distribute this software and
                     90:  * its documentation is hereby granted, provided that both the copyright
                     91:  * notice and this permission notice appear in all copies of the
                     92:  * software, derivative works or modified versions, and any portions
                     93:  * thereof, and that both notices appear in supporting documentation.
                     94:  *
                     95:  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
                     96:  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
                     97:  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
                     98:  *
                     99:  * Carnegie Mellon requests users of this software to return to
                    100:  *
                    101:  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
                    102:  *  School of Computer Science
                    103:  *  Carnegie Mellon University
                    104:  *  Pittsburgh PA 15213-3890
                    105:  *
                    106:  * any improvements or extensions that they make and grant Carnegie the
                    107:  * rights to redistribute these changes.
                    108:  */
                    109:
                    110: /***********************************************************
                    111:  *
                    112:  * rf_kintf.c -- the kernel interface routines for RAIDframe
                    113:  *
                    114:  ***********************************************************/
                    115:
                    116: #include <sys/errno.h>
                    117: #include <sys/param.h>
                    118: #include <sys/pool.h>
                    119: #include <sys/queue.h>
                    120: #include <sys/disk.h>
                    121: #include <sys/device.h>
                    122: #include <sys/stat.h>
                    123: #include <sys/ioctl.h>
                    124: #include <sys/fcntl.h>
                    125: #include <sys/systm.h>
                    126: #include <sys/namei.h>
                    127: #include <sys/vnode.h>
                    128: #include <sys/param.h>
                    129: #include <sys/types.h>
                    130: #include <machine/types.h>
                    131: #include <sys/disklabel.h>
                    132: #include <sys/conf.h>
                    133: #include <sys/lock.h>
                    134: #include <sys/buf.h>
                    135: #include <sys/user.h>
1.8       oster     136:
                    137: #include "raid.h"
1.1       oster     138: #include "rf_raid.h"
                    139: #include "rf_raidframe.h"
                    140: #include "rf_dag.h"
                    141: #include "rf_dagflags.h"
                    142: #include "rf_diskqueue.h"
                    143: #include "rf_acctrace.h"
                    144: #include "rf_etimer.h"
                    145: #include "rf_general.h"
                    146: #include "rf_debugMem.h"
                    147: #include "rf_kintf.h"
                    148: #include "rf_options.h"
                    149: #include "rf_driver.h"
                    150: #include "rf_parityscan.h"
                    151: #include "rf_debugprint.h"
                    152: #include "rf_threadstuff.h"
                    153:
1.9       oster     154: int     rf_kdebug_level = 0;
1.1       oster     155:
                    156: #define RFK_BOOT_NONE 0
                    157: #define RFK_BOOT_GOOD 1
                    158: #define RFK_BOOT_BAD  2
                    159: static int rf_kbooted = RFK_BOOT_NONE;
                    160:
                    161: #ifdef DEBUG
                    162: #define db0_printf(a) printf a
                    163: #define db_printf(a) if (rf_kdebug_level > 0) printf a
                    164: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
                    165: #define db2_printf(a) if (rf_kdebug_level > 1) printf a
                    166: #define db3_printf(a) if (rf_kdebug_level > 2) printf a
                    167: #define db4_printf(a) if (rf_kdebug_level > 3) printf a
                    168: #define db5_printf(a) if (rf_kdebug_level > 4) printf a
1.9       oster     169: #else                          /* DEBUG */
1.1       oster     170: #define db0_printf(a) printf a
                    171: #define db1_printf(a) { }
                    172: #define db2_printf(a) { }
                    173: #define db3_printf(a) { }
                    174: #define db4_printf(a) { }
                    175: #define db5_printf(a) { }
1.9       oster     176: #endif                         /* DEBUG */
1.1       oster     177:
1.9       oster     178: static RF_Raid_t **raidPtrs;   /* global raid device descriptors */
1.1       oster     179:
1.11      oster     180: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
1.1       oster     181:
1.10      oster     182: static RF_SparetWait_t *rf_sparet_wait_queue;  /* requests to install a
                    183:                                                 * spare table */
                    184: static RF_SparetWait_t *rf_sparet_resp_queue;  /* responses from
                    185:                                                 * installation process */
                    186:
                    187: static struct rf_recon_req *recon_queue = NULL;        /* used to communicate
                    188:                                                 * reconstruction
                    189:                                                 * requests */
1.1       oster     190:
                    191:
1.9       oster     192: decl_simple_lock_data(, recon_queue_mutex)
1.1       oster     193: #define LOCK_RECON_Q_MUTEX() simple_lock(&recon_queue_mutex)
                    194: #define UNLOCK_RECON_Q_MUTEX() simple_unlock(&recon_queue_mutex)
                    195:
                    196: /* prototypes */
1.10      oster     197: static void KernelWakeupFunc(struct buf * bp);
                    198: static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
                    199:                   dev_t dev, RF_SectorNum_t startSect,
                    200:                   RF_SectorCount_t numSect, caddr_t buf,
                    201:                   void (*cbFunc) (struct buf *), void *cbArg,
                    202:                   int logBytesPerSector, struct proc * b_proc);
1.1       oster     203:
1.11      oster     204: #define Dprintf0(s)       if (rf_queueDebug) \
                    205:      rf_debug_printf(s,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
                    206: #define Dprintf1(s,a)     if (rf_queueDebug) \
                    207:      rf_debug_printf(s,a,NULL,NULL,NULL,NULL,NULL,NULL,NULL)
                    208: #define Dprintf2(s,a,b)   if (rf_queueDebug) \
                    209:      rf_debug_printf(s,a,b,NULL,NULL,NULL,NULL,NULL,NULL)
                    210: #define Dprintf3(s,a,b,c) if (rf_queueDebug) \
                    211:      rf_debug_printf(s,a,b,c,NULL,NULL,NULL,NULL,NULL)
1.1       oster     212:
1.12      oster     213: int raidmarkclean(dev_t dev, struct vnode *b_vp, int);
                    214: int raidmarkdirty(dev_t dev, struct vnode *b_vp, int);
1.1       oster     215:
1.10      oster     216: void raidattach __P((int));
                    217: int raidsize __P((dev_t));
1.1       oster     218:
1.10      oster     219: void    rf_DiskIOComplete(RF_DiskQueue_t *, RF_DiskQueueData_t *, int);
                    220: void    rf_CopybackReconstructedData(RF_Raid_t * raidPtr);
                    221: static int raidinit __P((dev_t, RF_Raid_t *, int));
                    222:
                    223: int raidopen __P((dev_t, int, int, struct proc *));
                    224: int raidclose __P((dev_t, int, int, struct proc *));
                    225: int raidioctl __P((dev_t, u_long, caddr_t, int, struct proc *));
                    226: int raidwrite __P((dev_t, struct uio *, int));
                    227: int raidread __P((dev_t, struct uio *, int));
                    228: void raidstrategy __P((struct buf *));
                    229: int raiddump __P((dev_t, daddr_t, caddr_t, size_t));
1.1       oster     230:
1.11      oster     231: int raidwrite_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
                    232: int raidread_component_label(dev_t, struct vnode *, RF_ComponentLabel_t *);
1.13      oster     233: void rf_update_component_labels( RF_Raid_t *);
1.1       oster     234: /*
                    235:  * Pilfered from ccd.c
                    236:  */
                    237:
1.10      oster     238: struct raidbuf {
                    239:        struct buf rf_buf;      /* new I/O buf.  MUST BE FIRST!!! */
                    240:        struct buf *rf_obp;     /* ptr. to original I/O buf */
                    241:        int     rf_flags;       /* misc. flags */
1.11      oster     242:        RF_DiskQueueData_t *req;/* the request that this was part of.. */
1.10      oster     243: };
1.1       oster     244:
                    245:
                    246: #define RAIDGETBUF(rs) pool_get(&(rs)->sc_cbufpool, PR_NOWAIT)
                    247: #define        RAIDPUTBUF(rs, cbp) pool_put(&(rs)->sc_cbufpool, cbp)
                    248:
1.9       oster     249: /* XXX Not sure if the following should be replacing the raidPtrs above,
1.10      oster     250:    or if it should be used in conjunction with that... */
1.1       oster     251:
1.10      oster     252: struct raid_softc {
                    253:        int     sc_flags;       /* flags */
                    254:        int     sc_cflags;      /* configuration flags */
1.11      oster     255:        size_t  sc_size;        /* size of the raid device */
                    256:        dev_t   sc_dev;         /* our device.. */
1.10      oster     257:        char    sc_xname[20];   /* XXX external name */
                    258:        struct disk sc_dkdev;   /* generic disk device info */
                    259:        struct pool sc_cbufpool;        /* component buffer pool */
                    260: };
1.1       oster     261: /* sc_flags */
                    262: #define RAIDF_INITED   0x01    /* unit has been initialized */
                    263: #define RAIDF_WLABEL   0x02    /* label area is writable */
                    264: #define RAIDF_LABELLING        0x04    /* unit is currently being labelled */
                    265: #define RAIDF_WANTED   0x40    /* someone is waiting to obtain a lock */
                    266: #define RAIDF_LOCKED   0x80    /* unit is locked */
                    267:
                    268: #define        raidunit(x)     DISKUNIT(x)
1.10      oster     269: static int numraid = 0;
1.1       oster     270:
1.20      oster     271: /*
                    272:  * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
                    273:  * Be aware that large numbers can allow the driver to consume a lot of
1.28      oster     274:  * kernel memory, especially on writes, and in degraded mode reads.
                    275:  *
                    276:  * For example: with a stripe width of 64 blocks (32k) and 5 disks,
                    277:  * a single 64K write will typically require 64K for the old data,
                    278:  * 64K for the old parity, and 64K for the new parity, for a total
                    279:  * of 192K (if the parity buffer is not re-used immediately).
                    280:  * Even it if is used immedately, that's still 128K, which when multiplied
                    281:  * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
                    282:  *
                    283:  * Now in degraded mode, for example, a 64K read on the above setup may
                    284:  * require data reconstruction, which will require *all* of the 4 remaining
                    285:  * disks to participate -- 4 * 32K/disk == 128K again.
1.20      oster     286:  */
                    287:
                    288: #ifndef RAIDOUTSTANDING
1.28      oster     289: #define RAIDOUTSTANDING   6
1.20      oster     290: #endif
                    291:
1.1       oster     292: #define RAIDLABELDEV(dev)      \
                    293:        (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
                    294:
                    295: /* declared here, and made public, for the benefit of KVM stuff.. */
1.10      oster     296: struct raid_softc *raid_softc;
1.9       oster     297:
1.10      oster     298: static void raidgetdefaultlabel __P((RF_Raid_t *, struct raid_softc *,
                    299:                                     struct disklabel *));
                    300: static void raidgetdisklabel __P((dev_t));
                    301: static void raidmakedisklabel __P((struct raid_softc *));
1.1       oster     302:
1.10      oster     303: static int raidlock __P((struct raid_softc *));
                    304: static void raidunlock __P((struct raid_softc *));
                    305: int raidlookup __P((char *, struct proc * p, struct vnode **));
1.1       oster     306:
1.12      oster     307: static void rf_markalldirty __P((RF_Raid_t *));
1.1       oster     308:
1.10      oster     309: void
                    310: raidattach(num)
1.9       oster     311:        int     num;
1.1       oster     312: {
1.14      oster     313:        int raidID;
                    314:        int i, rc;
1.1       oster     315:
                    316: #ifdef DEBUG
1.9       oster     317:        printf("raidattach: Asked for %d units\n", num);
1.1       oster     318: #endif
                    319:
                    320:        if (num <= 0) {
                    321: #ifdef DIAGNOSTIC
                    322:                panic("raidattach: count <= 0");
                    323: #endif
                    324:                return;
                    325:        }
1.9       oster     326:        /* This is where all the initialization stuff gets done. */
1.1       oster     327:
                    328:        /* Make some space for requested number of units... */
                    329:
                    330:        RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
                    331:        if (raidPtrs == NULL) {
                    332:                panic("raidPtrs is NULL!!\n");
                    333:        }
1.14      oster     334:
                    335:        rc = rf_mutex_init(&rf_sparet_wait_mutex);
                    336:        if (rc) {
                    337:                RF_PANIC();
                    338:        }
                    339:
                    340:        rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
                    341:        recon_queue = NULL;
                    342:
                    343:        for (i = 0; i < numraid; i++)
                    344:                raidPtrs[i] = NULL;
                    345:        rc = rf_BootRaidframe();
                    346:        if (rc == 0)
                    347:                printf("Kernelized RAIDframe activated\n");
                    348:        else
1.1       oster     349:                panic("Serious error booting RAID!!\n");
1.14      oster     350:
1.9       oster     351:        rf_kbooted = RFK_BOOT_GOOD;
                    352:
                    353:        /* put together some datastructures like the CCD device does.. This
                    354:         * lets us lock the device and what-not when it gets opened. */
1.1       oster     355:
                    356:        raid_softc = (struct raid_softc *)
1.9       oster     357:            malloc(num * sizeof(struct raid_softc),
                    358:            M_RAIDFRAME, M_NOWAIT);
1.1       oster     359:        if (raid_softc == NULL) {
                    360:                printf("WARNING: no memory for RAIDframe driver\n");
                    361:                return;
                    362:        }
                    363:        numraid = num;
                    364:        bzero(raid_softc, num * sizeof(struct raid_softc));
1.11      oster     365:
1.9       oster     366:        for (raidID = 0; raidID < num; raidID++) {
                    367:                RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
1.11      oster     368:                          (RF_Raid_t *));
1.9       oster     369:                if (raidPtrs[raidID] == NULL) {
                    370:                        printf("raidPtrs[%d] is NULL\n", raidID);
1.1       oster     371:                }
                    372:        }
                    373: }
                    374:
                    375:
                    376: int
                    377: raidsize(dev)
1.9       oster     378:        dev_t   dev;
1.1       oster     379: {
                    380:        struct raid_softc *rs;
                    381:        struct disklabel *lp;
1.9       oster     382:        int     part, unit, omask, size;
1.1       oster     383:
                    384:        unit = raidunit(dev);
                    385:        if (unit >= numraid)
                    386:                return (-1);
                    387:        rs = &raid_softc[unit];
                    388:
                    389:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    390:                return (-1);
                    391:
                    392:        part = DISKPART(dev);
                    393:        omask = rs->sc_dkdev.dk_openmask & (1 << part);
                    394:        lp = rs->sc_dkdev.dk_label;
                    395:
                    396:        if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
                    397:                return (-1);
                    398:
                    399:        if (lp->d_partitions[part].p_fstype != FS_SWAP)
                    400:                size = -1;
                    401:        else
                    402:                size = lp->d_partitions[part].p_size *
                    403:                    (lp->d_secsize / DEV_BSIZE);
                    404:
                    405:        if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
                    406:                return (-1);
                    407:
                    408:        return (size);
                    409:
                    410: }
                    411:
                    412: int
                    413: raiddump(dev, blkno, va, size)
1.9       oster     414:        dev_t   dev;
1.1       oster     415:        daddr_t blkno;
                    416:        caddr_t va;
1.9       oster     417:        size_t  size;
1.1       oster     418: {
                    419:        /* Not implemented. */
                    420:        return ENXIO;
                    421: }
                    422: /* ARGSUSED */
                    423: int
                    424: raidopen(dev, flags, fmt, p)
1.9       oster     425:        dev_t   dev;
                    426:        int     flags, fmt;
1.1       oster     427:        struct proc *p;
                    428: {
1.9       oster     429:        int     unit = raidunit(dev);
1.1       oster     430:        struct raid_softc *rs;
                    431:        struct disklabel *lp;
1.9       oster     432:        int     part, pmask;
                    433:        int     error = 0;
                    434:
1.1       oster     435:        if (unit >= numraid)
                    436:                return (ENXIO);
                    437:        rs = &raid_softc[unit];
                    438:
                    439:        if ((error = raidlock(rs)) != 0)
1.9       oster     440:                return (error);
1.1       oster     441:        lp = rs->sc_dkdev.dk_label;
                    442:
                    443:        part = DISKPART(dev);
                    444:        pmask = (1 << part);
                    445:
                    446:        db1_printf(("Opening raid device number: %d partition: %d\n",
1.14      oster     447:                unit, part));
1.1       oster     448:
                    449:
                    450:        if ((rs->sc_flags & RAIDF_INITED) &&
                    451:            (rs->sc_dkdev.dk_openmask == 0))
1.9       oster     452:                raidgetdisklabel(dev);
1.1       oster     453:
                    454:        /* make sure that this partition exists */
                    455:
                    456:        if (part != RAW_PART) {
                    457:                db1_printf(("Not a raw partition..\n"));
                    458:                if (((rs->sc_flags & RAIDF_INITED) == 0) ||
                    459:                    ((part >= lp->d_npartitions) ||
1.9       oster     460:                        (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1       oster     461:                        error = ENXIO;
                    462:                        raidunlock(rs);
                    463:                        db1_printf(("Bailing out...\n"));
1.9       oster     464:                        return (error);
1.1       oster     465:                }
                    466:        }
                    467:        /* Prevent this unit from being unconfigured while open. */
                    468:        switch (fmt) {
                    469:        case S_IFCHR:
                    470:                rs->sc_dkdev.dk_copenmask |= pmask;
                    471:                break;
                    472:
                    473:        case S_IFBLK:
                    474:                rs->sc_dkdev.dk_bopenmask |= pmask;
                    475:                break;
                    476:        }
1.13      oster     477:
                    478:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    479:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    480:                /* First one... mark things as dirty... Note that we *MUST*
                    481:                 have done a configure before this.  I DO NOT WANT TO BE
                    482:                 SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
                    483:                 THAT THEY BELONG TOGETHER!!!!! */
                    484:                /* XXX should check to see if we're only open for reading
                    485:                   here... If so, we needn't do this, but then need some
                    486:                   other way of keeping track of what's happened.. */
                    487:
                    488:                rf_markalldirty( raidPtrs[unit] );
                    489:        }
                    490:
                    491:
1.1       oster     492:        rs->sc_dkdev.dk_openmask =
                    493:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
                    494:
                    495:        raidunlock(rs);
                    496:
1.9       oster     497:        return (error);
1.1       oster     498:
                    499:
                    500: }
                    501: /* ARGSUSED */
                    502: int
                    503: raidclose(dev, flags, fmt, p)
1.9       oster     504:        dev_t   dev;
                    505:        int     flags, fmt;
1.1       oster     506:        struct proc *p;
                    507: {
1.9       oster     508:        int     unit = raidunit(dev);
1.1       oster     509:        struct raid_softc *rs;
1.9       oster     510:        int     error = 0;
                    511:        int     part;
1.1       oster     512:
                    513:        if (unit >= numraid)
                    514:                return (ENXIO);
                    515:        rs = &raid_softc[unit];
                    516:
                    517:        if ((error = raidlock(rs)) != 0)
                    518:                return (error);
                    519:
                    520:        part = DISKPART(dev);
                    521:
                    522:        /* ...that much closer to allowing unconfiguration... */
                    523:        switch (fmt) {
                    524:        case S_IFCHR:
                    525:                rs->sc_dkdev.dk_copenmask &= ~(1 << part);
                    526:                break;
                    527:
                    528:        case S_IFBLK:
                    529:                rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
                    530:                break;
                    531:        }
                    532:        rs->sc_dkdev.dk_openmask =
                    533:            rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.13      oster     534:
                    535:        if ((rs->sc_dkdev.dk_openmask == 0) &&
                    536:            ((rs->sc_flags & RAIDF_INITED) != 0)) {
                    537:                /* Last one... device is not unconfigured yet.
                    538:                   Device shutdown has taken care of setting the
                    539:                   clean bits if RAIDF_INITED is not set
                    540:                   mark things as clean... */
                    541:                rf_update_component_labels( raidPtrs[unit] );
                    542:        }
1.1       oster     543:
                    544:        raidunlock(rs);
                    545:        return (0);
                    546:
                    547: }
                    548:
                    549: void
                    550: raidstrategy(bp)
                    551:        register struct buf *bp;
                    552: {
                    553:        register int s;
                    554:
                    555:        unsigned int raidID = raidunit(bp->b_dev);
                    556:        RF_Raid_t *raidPtr;
                    557:        struct raid_softc *rs = &raid_softc[raidID];
                    558:        struct disklabel *lp;
1.9       oster     559:        int     wlabel;
1.1       oster     560:
1.5       oster     561: #if 0
1.9       oster     562:        db1_printf(("Strategy: 0x%x 0x%x\n", bp, bp->b_data));
                    563:        db1_printf(("Strategy(2): bp->b_bufsize%d\n", (int) bp->b_bufsize));
                    564:        db1_printf(("bp->b_count=%d\n", (int) bp->b_bcount));
                    565:        db1_printf(("bp->b_resid=%d\n", (int) bp->b_resid));
                    566:        db1_printf(("bp->b_blkno=%d\n", (int) bp->b_blkno));
1.5       oster     567:
1.9       oster     568:        if (bp->b_flags & B_READ)
1.1       oster     569:                db1_printf(("READ\n"));
                    570:        else
                    571:                db1_printf(("WRITE\n"));
                    572: #endif
                    573:        if (rf_kbooted != RFK_BOOT_GOOD)
                    574:                return;
                    575:        if (raidID >= numraid || !raidPtrs[raidID]) {
                    576:                bp->b_error = ENODEV;
                    577:                bp->b_flags |= B_ERROR;
                    578:                bp->b_resid = bp->b_bcount;
                    579:                biodone(bp);
                    580:                return;
                    581:        }
                    582:        raidPtr = raidPtrs[raidID];
                    583:        if (!raidPtr->valid) {
                    584:                bp->b_error = ENODEV;
                    585:                bp->b_flags |= B_ERROR;
                    586:                bp->b_resid = bp->b_bcount;
                    587:                biodone(bp);
                    588:                return;
                    589:        }
                    590:        if (bp->b_bcount == 0) {
                    591:                db1_printf(("b_bcount is zero..\n"));
                    592:                biodone(bp);
                    593:                return;
                    594:        }
                    595:        lp = rs->sc_dkdev.dk_label;
                    596:
                    597:        /*
                    598:         * Do bounds checking and adjust transfer.  If there's an
                    599:         * error, the bounds check will flag that for us.
                    600:         */
                    601:
1.9       oster     602:        wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.1       oster     603:        if (DISKPART(bp->b_dev) != RAW_PART)
                    604:                if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
                    605:                        db1_printf(("Bounds check failed!!:%d %d\n",
1.9       oster     606:                                (int) bp->b_blkno, (int) wlabel));
1.1       oster     607:                        biodone(bp);
                    608:                        return;
                    609:                }
1.9       oster     610:        s = splbio();           /* XXX Needed? */
                    611:        db1_printf(("Beginning strategy...\n"));
1.1       oster     612:
                    613:        bp->b_resid = 0;
1.9       oster     614:        bp->b_error = rf_DoAccessKernel(raidPtrs[raidID], bp,
                    615:            NULL, NULL, NULL);
1.1       oster     616:        if (bp->b_error) {
                    617:                bp->b_flags |= B_ERROR;
                    618:                db1_printf(("bp->b_flags HAS B_ERROR SET!!!: %d\n",
1.9       oster     619:                        bp->b_error));
1.1       oster     620:        }
                    621:        splx(s);
1.5       oster     622: #if 0
1.1       oster     623:        db1_printf(("Strategy exiting: 0x%x 0x%x %d %d\n",
1.9       oster     624:                bp, bp->b_data,
                    625:                (int) bp->b_bcount, (int) bp->b_resid));
1.5       oster     626: #endif
1.1       oster     627: }
                    628: /* ARGSUSED */
                    629: int
                    630: raidread(dev, uio, flags)
1.9       oster     631:        dev_t   dev;
1.1       oster     632:        struct uio *uio;
1.9       oster     633:        int     flags;
1.1       oster     634: {
1.9       oster     635:        int     unit = raidunit(dev);
1.1       oster     636:        struct raid_softc *rs;
1.9       oster     637:        int     part;
1.1       oster     638:
                    639:        if (unit >= numraid)
                    640:                return (ENXIO);
                    641:        rs = &raid_softc[unit];
                    642:
                    643:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    644:                return (ENXIO);
                    645:        part = DISKPART(dev);
                    646:
1.9       oster     647:        db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
1.1       oster     648:
                    649:        return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
                    650:
                    651: }
                    652: /* ARGSUSED */
                    653: int
                    654: raidwrite(dev, uio, flags)
1.9       oster     655:        dev_t   dev;
1.1       oster     656:        struct uio *uio;
1.9       oster     657:        int     flags;
1.1       oster     658: {
1.9       oster     659:        int     unit = raidunit(dev);
1.1       oster     660:        struct raid_softc *rs;
                    661:
                    662:        if (unit >= numraid)
                    663:                return (ENXIO);
                    664:        rs = &raid_softc[unit];
                    665:
                    666:        if ((rs->sc_flags & RAIDF_INITED) == 0)
                    667:                return (ENXIO);
                    668:        db1_printf(("raidwrite\n"));
                    669:        return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
                    670:
                    671: }
                    672:
                    673: int
                    674: raidioctl(dev, cmd, data, flag, p)
1.9       oster     675:        dev_t   dev;
                    676:        u_long  cmd;
1.1       oster     677:        caddr_t data;
1.9       oster     678:        int     flag;
1.1       oster     679:        struct proc *p;
                    680: {
1.9       oster     681:        int     unit = raidunit(dev);
                    682:        int     error = 0;
                    683:        int     part, pmask;
1.1       oster     684:        struct raid_softc *rs;
                    685:        RF_Config_t *k_cfg, *u_cfg;
                    686:        u_char *specific_buf;
1.11      oster     687:        int retcode = 0;
                    688:        int row;
                    689:        int column;
1.21      oster     690:        int s;
1.1       oster     691:        struct rf_recon_req *rrcopy, *rr;
1.11      oster     692:        RF_ComponentLabel_t *component_label;
                    693:        RF_ComponentLabel_t ci_label;
                    694:        RF_ComponentLabel_t **c_label_ptr;
1.12      oster     695:        RF_SingleComponent_t *sparePtr,*componentPtr;
                    696:        RF_SingleComponent_t hot_spare;
                    697:        RF_SingleComponent_t component;
1.1       oster     698:
                    699:        if (unit >= numraid)
                    700:                return (ENXIO);
                    701:        rs = &raid_softc[unit];
                    702:
1.9       oster     703:        db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
                    704:                (int) DISKPART(dev), (int) unit, (int) cmd));
1.1       oster     705:
                    706:        /* Must be open for writes for these commands... */
                    707:        switch (cmd) {
                    708:        case DIOCSDINFO:
                    709:        case DIOCWDINFO:
                    710:        case DIOCWLABEL:
                    711:                if ((flag & FWRITE) == 0)
                    712:                        return (EBADF);
                    713:        }
                    714:
                    715:        /* Must be initialized for these... */
                    716:        switch (cmd) {
                    717:        case DIOCGDINFO:
                    718:        case DIOCSDINFO:
                    719:        case DIOCWDINFO:
                    720:        case DIOCGPART:
                    721:        case DIOCWLABEL:
                    722:        case DIOCGDEFLABEL:
                    723:        case RAIDFRAME_SHUTDOWN:
                    724:        case RAIDFRAME_REWRITEPARITY:
                    725:        case RAIDFRAME_GET_INFO:
                    726:        case RAIDFRAME_RESET_ACCTOTALS:
                    727:        case RAIDFRAME_GET_ACCTOTALS:
                    728:        case RAIDFRAME_KEEP_ACCTOTALS:
                    729:        case RAIDFRAME_GET_SIZE:
                    730:        case RAIDFRAME_FAIL_DISK:
                    731:        case RAIDFRAME_COPYBACK:
                    732:        case RAIDFRAME_CHECKRECON:
1.11      oster     733:        case RAIDFRAME_GET_COMPONENT_LABEL:
                    734:        case RAIDFRAME_SET_COMPONENT_LABEL:
                    735:        case RAIDFRAME_ADD_HOT_SPARE:
                    736:        case RAIDFRAME_REMOVE_HOT_SPARE:
                    737:        case RAIDFRAME_INIT_LABELS:
1.12      oster     738:        case RAIDFRAME_REBUILD_IN_PLACE:
1.23      oster     739:        case RAIDFRAME_CHECK_PARITY:
1.1       oster     740:                if ((rs->sc_flags & RAIDF_INITED) == 0)
                    741:                        return (ENXIO);
                    742:        }
1.9       oster     743:
1.1       oster     744:        switch (cmd) {
                    745:
                    746:
                    747:                /* configure the system */
                    748:        case RAIDFRAME_CONFIGURE:
                    749:
                    750:                db3_printf(("rf_ioctl: RAIDFRAME_CONFIGURE\n"));
                    751:                /* copy-in the configuration information */
                    752:                /* data points to a pointer to the configuration structure */
1.9       oster     753:                u_cfg = *((RF_Config_t **) data);
                    754:                RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1       oster     755:                if (k_cfg == NULL) {
                    756:                        db3_printf(("rf_ioctl: ENOMEM for config. Code is %d\n", retcode));
1.9       oster     757:                        return (ENOMEM);
1.1       oster     758:                }
1.9       oster     759:                retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
                    760:                    sizeof(RF_Config_t));
1.1       oster     761:                if (retcode) {
1.9       oster     762:                        db3_printf(("rf_ioctl: retcode=%d copyin.1\n",
                    763:                                retcode));
                    764:                        return (retcode);
1.1       oster     765:                }
1.9       oster     766:                /* allocate a buffer for the layout-specific data, and copy it
                    767:                 * in */
1.1       oster     768:                if (k_cfg->layoutSpecificSize) {
1.9       oster     769:                        if (k_cfg->layoutSpecificSize > 10000) {
1.1       oster     770:                                /* sanity check */
                    771:                                db3_printf(("rf_ioctl: EINVAL %d\n", retcode));
1.9       oster     772:                                return (EINVAL);
1.1       oster     773:                        }
1.9       oster     774:                        RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
                    775:                            (u_char *));
1.1       oster     776:                        if (specific_buf == NULL) {
1.9       oster     777:                                RF_Free(k_cfg, sizeof(RF_Config_t));
1.1       oster     778:                                db3_printf(("rf_ioctl: ENOMEM %d\n", retcode));
1.9       oster     779:                                return (ENOMEM);
1.1       oster     780:                        }
1.9       oster     781:                        retcode = copyin(k_cfg->layoutSpecific,
                    782:                            (caddr_t) specific_buf,
                    783:                            k_cfg->layoutSpecificSize);
1.1       oster     784:                        if (retcode) {
                    785:                                db3_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9       oster     786:                                        retcode));
                    787:                                return (retcode);
1.1       oster     788:                        }
1.9       oster     789:                } else
                    790:                        specific_buf = NULL;
1.1       oster     791:                k_cfg->layoutSpecific = specific_buf;
1.9       oster     792:
                    793:                /* should do some kind of sanity check on the configuration.
                    794:                 * Store the sum of all the bytes in the last byte? */
1.1       oster     795:
                    796:                /* configure the system */
                    797:
                    798:                raidPtrs[unit]->raidid = unit;
1.20      oster     799:
1.1       oster     800:                retcode = rf_Configure(raidPtrs[unit], k_cfg);
                    801:
1.20      oster     802:                /* allow this many simultaneous IO's to this RAID device */
                    803:                raidPtrs[unit]->openings = RAIDOUTSTANDING;
1.9       oster     804:
1.1       oster     805:                if (retcode == 0) {
1.9       oster     806:                        retcode = raidinit(dev, raidPtrs[unit], unit);
1.12      oster     807:                        rf_markalldirty( raidPtrs[unit] );
1.9       oster     808:                }
1.1       oster     809:                /* free the buffers.  No return code here. */
                    810:                if (k_cfg->layoutSpecificSize) {
1.9       oster     811:                        RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1       oster     812:                }
1.9       oster     813:                RF_Free(k_cfg, sizeof(RF_Config_t));
                    814:
                    815:                db3_printf(("rf_ioctl: retcode=%d RAIDFRAME_CONFIGURE\n",
                    816:                        retcode));
1.11      oster     817:
1.9       oster     818:                return (retcode);
                    819:
                    820:                /* shutdown the system */
1.1       oster     821:        case RAIDFRAME_SHUTDOWN:
1.9       oster     822:
                    823:                if ((error = raidlock(rs)) != 0)
                    824:                        return (error);
1.1       oster     825:
                    826:                /*
                    827:                 * If somebody has a partition mounted, we shouldn't
                    828:                 * shutdown.
                    829:                 */
                    830:
                    831:                part = DISKPART(dev);
                    832:                pmask = (1 << part);
1.9       oster     833:                if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
                    834:                    ((rs->sc_dkdev.dk_bopenmask & pmask) &&
                    835:                        (rs->sc_dkdev.dk_copenmask & pmask))) {
                    836:                        raidunlock(rs);
                    837:                        return (EBUSY);
                    838:                }
1.11      oster     839:
1.1       oster     840:                if (rf_debugKernelAccess) {
                    841:                        printf("call shutdown\n");
                    842:                }
1.11      oster     843:
1.1       oster     844:                retcode = rf_Shutdown(raidPtrs[unit]);
                    845:
1.3       hubertf   846:                db1_printf(("Done main shutdown\n"));
1.1       oster     847:
                    848:                pool_destroy(&rs->sc_cbufpool);
1.3       hubertf   849:                db1_printf(("Done freeing component buffer freelist\n"));
1.1       oster     850:
                    851:                /* It's no longer initialized... */
                    852:                rs->sc_flags &= ~RAIDF_INITED;
1.16      oster     853:
1.9       oster     854:                /* Detach the disk. */
                    855:                disk_detach(&rs->sc_dkdev);
1.1       oster     856:
                    857:                raidunlock(rs);
                    858:
1.9       oster     859:                return (retcode);
1.11      oster     860:        case RAIDFRAME_GET_COMPONENT_LABEL:
                    861:                c_label_ptr = (RF_ComponentLabel_t **) data;
                    862:                /* need to read the component label for the disk indicated
                    863:                   by row,column in component_label
                    864:                   XXX need to sanity check these values!!!
                    865:                   */
                    866:
                    867:                /* For practice, let's get it directly fromdisk, rather
                    868:                   than from the in-core copy */
                    869:                RF_Malloc( component_label, sizeof( RF_ComponentLabel_t ),
                    870:                           (RF_ComponentLabel_t *));
                    871:                if (component_label == NULL)
                    872:                        return (ENOMEM);
                    873:
                    874:                bzero((char *) component_label, sizeof(RF_ComponentLabel_t));
                    875:
                    876:                retcode = copyin( *c_label_ptr, component_label,
                    877:                                  sizeof(RF_ComponentLabel_t));
                    878:
                    879:                if (retcode) {
                    880:                        return(retcode);
                    881:                }
                    882:
                    883:                row = component_label->row;
                    884:                column = component_label->column;
1.26      oster     885:
                    886:                if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
                    887:                    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
                    888:                        return(EINVAL);
1.11      oster     889:                }
                    890:
                    891:                raidread_component_label(
                    892:                               raidPtrs[unit]->Disks[row][column].dev,
                    893:                              raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
                    894:                              component_label );
                    895:
                    896:                retcode = copyout((caddr_t) component_label,
                    897:                                  (caddr_t) *c_label_ptr,
                    898:                                  sizeof(RF_ComponentLabel_t));
                    899:                RF_Free( component_label, sizeof(RF_ComponentLabel_t));
                    900:                return (retcode);
                    901:
                    902:        case RAIDFRAME_SET_COMPONENT_LABEL:
                    903:                component_label = (RF_ComponentLabel_t *) data;
                    904:
                    905:                /* XXX check the label for valid stuff... */
                    906:                /* Note that some things *should not* get modified --
                    907:                   the user should be re-initing the labels instead of
                    908:                   trying to patch things.
                    909:                   */
                    910:
                    911:                printf("Got component label:\n");
                    912:                printf("Version: %d\n",component_label->version);
                    913:                printf("Serial Number: %d\n",component_label->serial_number);
                    914:                printf("Mod counter: %d\n",component_label->mod_counter);
                    915:                printf("Row: %d\n", component_label->row);
                    916:                printf("Column: %d\n", component_label->column);
                    917:                printf("Num Rows: %d\n", component_label->num_rows);
                    918:                printf("Num Columns: %d\n", component_label->num_columns);
                    919:                printf("Clean: %d\n", component_label->clean);
                    920:                printf("Status: %d\n", component_label->status);
                    921:
                    922:                row = component_label->row;
                    923:                column = component_label->column;
1.12      oster     924:
1.26      oster     925:                if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
                    926:                    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1.12      oster     927:                        return(EINVAL);
1.11      oster     928:                }
1.12      oster     929:
                    930:                /* XXX this isn't allowed to do anything for now :-) */
                    931: #if 0
1.11      oster     932:                raidwrite_component_label(
                    933:                             raidPtrs[unit]->Disks[row][column].dev,
                    934:                            raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
                    935:                            component_label );
1.12      oster     936: #endif
                    937:                return (0);
1.11      oster     938:
                    939:        case RAIDFRAME_INIT_LABELS:
                    940:                component_label = (RF_ComponentLabel_t *) data;
                    941:                /*
                    942:                   we only want the serial number from
                    943:                   the above.  We get all the rest of the information
                    944:                   from the config that was used to create this RAID
                    945:                   set.
                    946:                   */
1.12      oster     947:
                    948:                raidPtrs[unit]->serial_number = component_label->serial_number;
                    949:                /* current version number */
                    950:                ci_label.version = RF_COMPONENT_LABEL_VERSION;
1.11      oster     951:                ci_label.serial_number = component_label->serial_number;
1.12      oster     952:                ci_label.mod_counter = raidPtrs[unit]->mod_counter;
1.11      oster     953:                ci_label.num_rows = raidPtrs[unit]->numRow;
                    954:                ci_label.num_columns = raidPtrs[unit]->numCol;
                    955:                ci_label.clean = RF_RAID_DIRTY; /* not clean */
                    956:                ci_label.status = rf_ds_optimal; /* "It's good!" */
                    957:
                    958:                for(row=0;row<raidPtrs[unit]->numRow;row++) {
                    959:                        ci_label.row = row;
                    960:                        for(column=0;column<raidPtrs[unit]->numCol;column++) {
                    961:                                ci_label.column = column;
                    962:                                raidwrite_component_label(
                    963:                                  raidPtrs[unit]->Disks[row][column].dev,
                    964:                                  raidPtrs[unit]->raid_cinfo[row][column].ci_vp,
                    965:                                  &ci_label );
                    966:                        }
                    967:                }
                    968:
                    969:                return (retcode);
1.9       oster     970:
1.1       oster     971:                /* initialize all parity */
                    972:        case RAIDFRAME_REWRITEPARITY:
                    973:
1.17      oster     974:                if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
                    975:                        /* Parity for RAID 0 is trivially correct */
                    976:                        raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
                    977:                        return(0);
                    978:                }
                    979:
1.1       oster     980:                /* borrow the thread of the requesting process */
1.27      oster     981:
1.22      oster     982:                s = splbio();
1.1       oster     983:                retcode = rf_RewriteParity(raidPtrs[unit]);
1.22      oster     984:                splx(s);
1.9       oster     985:                /* return I/O Error if the parity rewrite fails */
1.1       oster     986:
1.11      oster     987:                if (retcode) {
1.9       oster     988:                        retcode = EIO;
1.11      oster     989:                } else {
1.12      oster     990:                        /* set the clean bit!  If we shutdown correctly,
                    991:                         the clean bit on each component label will get
                    992:                         set */
                    993:                        raidPtrs[unit]->parity_good = RF_RAID_CLEAN;
1.11      oster     994:                }
1.9       oster     995:                return (retcode);
                    996:
1.11      oster     997:
                    998:        case RAIDFRAME_ADD_HOT_SPARE:
1.12      oster     999:                sparePtr = (RF_SingleComponent_t *) data;
                   1000:                memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
                   1001:                printf("Adding spare\n");
                   1002:                retcode = rf_add_hot_spare(raidPtrs[unit], &hot_spare);
1.11      oster    1003:                return(retcode);
                   1004:
                   1005:        case RAIDFRAME_REMOVE_HOT_SPARE:
                   1006:                return(retcode);
                   1007:
1.12      oster    1008:        case RAIDFRAME_REBUILD_IN_PLACE:
1.24      oster    1009:
                   1010:                if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
                   1011:                        /* Can't do this on a RAID 0!! */
                   1012:                        return(EINVAL);
                   1013:                }
                   1014:
1.12      oster    1015:                componentPtr = (RF_SingleComponent_t *) data;
                   1016:                memcpy( &component, componentPtr,
                   1017:                        sizeof(RF_SingleComponent_t));
                   1018:                row = component.row;
                   1019:                column = component.column;
                   1020:                printf("Rebuild: %d %d\n",row, column);
1.26      oster    1021:                if ((row < 0) || (row >= raidPtrs[unit]->numRow) ||
                   1022:                    (column < 0) || (column >= raidPtrs[unit]->numCol)) {
1.12      oster    1023:                        return(EINVAL);
                   1024:                }
                   1025:                printf("Attempting a rebuild in place\n");
1.21      oster    1026:                s = splbio();
1.12      oster    1027:                retcode = rf_ReconstructInPlace(raidPtrs[unit], row, column);
1.21      oster    1028:                splx(s);
1.12      oster    1029:                return(retcode);
                   1030:
1.1       oster    1031:        case RAIDFRAME_GET_INFO:
                   1032:                {
                   1033:                        RF_Raid_t *raid = raidPtrs[unit];
                   1034:                        RF_DeviceConfig_t *cfg, **ucfgp;
1.9       oster    1035:                        int     i, j, d;
                   1036:
1.1       oster    1037:                        if (!raid->valid)
1.9       oster    1038:                                return (ENODEV);
                   1039:                        ucfgp = (RF_DeviceConfig_t **) data;
                   1040:                        RF_Malloc(cfg, sizeof(RF_DeviceConfig_t),
1.11      oster    1041:                                  (RF_DeviceConfig_t *));
1.1       oster    1042:                        if (cfg == NULL)
1.9       oster    1043:                                return (ENOMEM);
                   1044:                        bzero((char *) cfg, sizeof(RF_DeviceConfig_t));
1.1       oster    1045:                        cfg->rows = raid->numRow;
                   1046:                        cfg->cols = raid->numCol;
                   1047:                        cfg->ndevs = raid->numRow * raid->numCol;
                   1048:                        if (cfg->ndevs >= RF_MAX_DISKS) {
                   1049:                                cfg->ndevs = 0;
1.9       oster    1050:                                return (ENOMEM);
1.1       oster    1051:                        }
                   1052:                        cfg->nspares = raid->numSpare;
                   1053:                        if (cfg->nspares >= RF_MAX_DISKS) {
                   1054:                                cfg->nspares = 0;
1.9       oster    1055:                                return (ENOMEM);
1.1       oster    1056:                        }
                   1057:                        cfg->maxqdepth = raid->maxQueueDepth;
                   1058:                        d = 0;
1.9       oster    1059:                        for (i = 0; i < cfg->rows; i++) {
                   1060:                                for (j = 0; j < cfg->cols; j++) {
1.1       oster    1061:                                        cfg->devs[d] = raid->Disks[i][j];
                   1062:                                        d++;
                   1063:                                }
                   1064:                        }
1.9       oster    1065:                        for (j = cfg->cols, i = 0; i < cfg->nspares; i++, j++) {
1.1       oster    1066:                                cfg->spares[i] = raid->Disks[0][j];
                   1067:                        }
1.9       oster    1068:                        retcode = copyout((caddr_t) cfg, (caddr_t) * ucfgp,
1.11      oster    1069:                                          sizeof(RF_DeviceConfig_t));
1.9       oster    1070:                        RF_Free(cfg, sizeof(RF_DeviceConfig_t));
                   1071:
                   1072:                        return (retcode);
1.1       oster    1073:                }
1.9       oster    1074:                break;
1.22      oster    1075:        case RAIDFRAME_CHECK_PARITY:
                   1076:                *(int *) data = raidPtrs[unit]->parity_good;
                   1077:                return (0);
1.1       oster    1078:        case RAIDFRAME_RESET_ACCTOTALS:
                   1079:                {
                   1080:                        RF_Raid_t *raid = raidPtrs[unit];
1.9       oster    1081:
1.1       oster    1082:                        bzero(&raid->acc_totals, sizeof(raid->acc_totals));
1.9       oster    1083:                        return (0);
1.1       oster    1084:                }
1.9       oster    1085:                break;
                   1086:
1.1       oster    1087:        case RAIDFRAME_GET_ACCTOTALS:
                   1088:                {
1.9       oster    1089:                        RF_AccTotals_t *totals = (RF_AccTotals_t *) data;
1.1       oster    1090:                        RF_Raid_t *raid = raidPtrs[unit];
1.9       oster    1091:
1.1       oster    1092:                        *totals = raid->acc_totals;
1.9       oster    1093:                        return (0);
1.1       oster    1094:                }
1.9       oster    1095:                break;
                   1096:
1.1       oster    1097:        case RAIDFRAME_KEEP_ACCTOTALS:
                   1098:                {
                   1099:                        RF_Raid_t *raid = raidPtrs[unit];
1.9       oster    1100:                        int    *keep = (int *) data;
                   1101:
1.1       oster    1102:                        raid->keep_acc_totals = *keep;
1.9       oster    1103:                        return (0);
1.1       oster    1104:                }
1.9       oster    1105:                break;
                   1106:
1.1       oster    1107:        case RAIDFRAME_GET_SIZE:
                   1108:                *(int *) data = raidPtrs[unit]->totalSectors;
1.9       oster    1109:                return (0);
1.1       oster    1110:
                   1111: #define RAIDFRAME_RECON 1
                   1112:                /* XXX The above should probably be set somewhere else!! GO */
                   1113: #if RAIDFRAME_RECON > 0
                   1114:
                   1115:                /* fail a disk & optionally start reconstruction */
                   1116:        case RAIDFRAME_FAIL_DISK:
1.24      oster    1117:
                   1118:                if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
                   1119:                        /* Can't do this on a RAID 0!! */
                   1120:                        return(EINVAL);
                   1121:                }
                   1122:
1.1       oster    1123:                rr = (struct rf_recon_req *) data;
1.9       oster    1124:
                   1125:                if (rr->row < 0 || rr->row >= raidPtrs[unit]->numRow
1.1       oster    1126:                    || rr->col < 0 || rr->col >= raidPtrs[unit]->numCol)
1.9       oster    1127:                        return (EINVAL);
1.1       oster    1128:
1.12      oster    1129:                printf("raid%d: Failing the disk: row: %d col: %d\n",
                   1130:                       unit, rr->row, rr->col);
1.9       oster    1131:
                   1132:                /* make a copy of the recon request so that we don't rely on
                   1133:                 * the user's buffer */
1.1       oster    1134:                RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
                   1135:                bcopy(rr, rrcopy, sizeof(*rr));
                   1136:                rrcopy->raidPtr = (void *) raidPtrs[unit];
                   1137:
                   1138:                LOCK_RECON_Q_MUTEX();
                   1139:                rrcopy->next = recon_queue;
                   1140:                recon_queue = rrcopy;
                   1141:                wakeup(&recon_queue);
                   1142:                UNLOCK_RECON_Q_MUTEX();
1.9       oster    1143:
                   1144:                return (0);
                   1145:
                   1146:                /* invoke a copyback operation after recon on whatever disk
                   1147:                 * needs it, if any */
                   1148:        case RAIDFRAME_COPYBACK:
1.24      oster    1149:
                   1150:                if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
                   1151:                        /* This makes no sense on a RAID 0!! */
                   1152:                        return(EINVAL);
                   1153:                }
                   1154:
1.1       oster    1155:                /* borrow the current thread to get this done */
1.27      oster    1156:
1.21      oster    1157:                s = splbio();
1.1       oster    1158:                rf_CopybackReconstructedData(raidPtrs[unit]);
1.21      oster    1159:                splx(s);
1.9       oster    1160:                return (0);
                   1161:
1.1       oster    1162:                /* return the percentage completion of reconstruction */
                   1163:        case RAIDFRAME_CHECKRECON:
1.24      oster    1164:                if (raidPtrs[unit]->Layout.map->faultsTolerated == 0) {
                   1165:                        /* This makes no sense on a RAID 0 */
                   1166:                        return(EINVAL);
                   1167:                }
                   1168:
1.1       oster    1169:                row = *(int *) data;
                   1170:                if (row < 0 || row >= raidPtrs[unit]->numRow)
1.9       oster    1171:                        return (EINVAL);
                   1172:                if (raidPtrs[unit]->status[row] != rf_rs_reconstructing)
1.1       oster    1173:                        *(int *) data = 100;
1.9       oster    1174:                else
1.1       oster    1175:                        *(int *) data = raidPtrs[unit]->reconControl[row]->percentComplete;
1.9       oster    1176:                return (0);
                   1177:
                   1178:                /* the sparetable daemon calls this to wait for the kernel to
                   1179:                 * need a spare table. this ioctl does not return until a
                   1180:                 * spare table is needed. XXX -- calling mpsleep here in the
                   1181:                 * ioctl code is almost certainly wrong and evil. -- XXX XXX
                   1182:                 * -- I should either compute the spare table in the kernel,
                   1183:                 * or have a different -- XXX XXX -- interface (a different
                   1184:                 * character device) for delivering the table          -- XXX */
1.1       oster    1185: #if 0
                   1186:        case RAIDFRAME_SPARET_WAIT:
                   1187:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1188:                while (!rf_sparet_wait_queue)
                   1189:                        mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1.1       oster    1190:                waitreq = rf_sparet_wait_queue;
                   1191:                rf_sparet_wait_queue = rf_sparet_wait_queue->next;
                   1192:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1193:
                   1194:                *((RF_SparetWait_t *) data) = *waitreq; /* structure assignment */
                   1195:
1.1       oster    1196:                RF_Free(waitreq, sizeof(*waitreq));
1.9       oster    1197:                return (0);
                   1198:
                   1199:
                   1200:                /* wakes up a process waiting on SPARET_WAIT and puts an error
                   1201:                 * code in it that will cause the dameon to exit */
1.1       oster    1202:        case RAIDFRAME_ABORT_SPARET_WAIT:
                   1203:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1204:                waitreq->fcol = -1;
                   1205:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1206:                waitreq->next = rf_sparet_wait_queue;
                   1207:                rf_sparet_wait_queue = waitreq;
                   1208:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1209:                wakeup(&rf_sparet_wait_queue);
1.9       oster    1210:                return (0);
1.1       oster    1211:
1.9       oster    1212:                /* used by the spare table daemon to deliver a spare table
                   1213:                 * into the kernel */
1.1       oster    1214:        case RAIDFRAME_SEND_SPARET:
1.9       oster    1215:
1.1       oster    1216:                /* install the spare table */
1.9       oster    1217:                retcode = rf_SetSpareTable(raidPtrs[unit], *(void **) data);
                   1218:
                   1219:                /* respond to the requestor.  the return status of the spare
                   1220:                 * table installation is passed in the "fcol" field */
1.1       oster    1221:                RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
                   1222:                waitreq->fcol = retcode;
                   1223:                RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1224:                waitreq->next = rf_sparet_resp_queue;
                   1225:                rf_sparet_resp_queue = waitreq;
                   1226:                wakeup(&rf_sparet_resp_queue);
                   1227:                RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9       oster    1228:
                   1229:                return (retcode);
1.1       oster    1230: #endif
                   1231:
                   1232:
1.9       oster    1233: #endif                         /* RAIDFRAME_RECON > 0 */
                   1234:
                   1235:        default:
                   1236:                break;          /* fall through to the os-specific code below */
1.1       oster    1237:
                   1238:        }
1.9       oster    1239:
1.1       oster    1240:        if (!raidPtrs[unit]->valid)
1.9       oster    1241:                return (EINVAL);
                   1242:
1.1       oster    1243:        /*
                   1244:         * Add support for "regular" device ioctls here.
                   1245:         */
1.9       oster    1246:
1.1       oster    1247:        switch (cmd) {
                   1248:        case DIOCGDINFO:
1.9       oster    1249:                db1_printf(("DIOCGDINFO %d %d\n", (int) dev, (int) DISKPART(dev)));
                   1250:                *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1       oster    1251:                break;
                   1252:
                   1253:        case DIOCGPART:
1.9       oster    1254:                db1_printf(("DIOCGPART: %d %d\n", (int) dev, (int) DISKPART(dev)));
                   1255:                ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
                   1256:                ((struct partinfo *) data)->part =
1.1       oster    1257:                    &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
                   1258:                break;
                   1259:
                   1260:        case DIOCWDINFO:
                   1261:                db1_printf(("DIOCWDINFO\n"));
                   1262:        case DIOCSDINFO:
                   1263:                db1_printf(("DIOCSDINFO\n"));
                   1264:                if ((error = raidlock(rs)) != 0)
                   1265:                        return (error);
                   1266:
                   1267:                rs->sc_flags |= RAIDF_LABELLING;
                   1268:
                   1269:                error = setdisklabel(rs->sc_dkdev.dk_label,
1.9       oster    1270:                    (struct disklabel *) data, 0, rs->sc_dkdev.dk_cpulabel);
1.1       oster    1271:                if (error == 0) {
                   1272:                        if (cmd == DIOCWDINFO)
                   1273:                                error = writedisklabel(RAIDLABELDEV(dev),
                   1274:                                    raidstrategy, rs->sc_dkdev.dk_label,
                   1275:                                    rs->sc_dkdev.dk_cpulabel);
                   1276:                }
                   1277:                rs->sc_flags &= ~RAIDF_LABELLING;
                   1278:
                   1279:                raidunlock(rs);
                   1280:
                   1281:                if (error)
                   1282:                        return (error);
                   1283:                break;
                   1284:
                   1285:        case DIOCWLABEL:
                   1286:                db1_printf(("DIOCWLABEL\n"));
1.9       oster    1287:                if (*(int *) data != 0)
1.1       oster    1288:                        rs->sc_flags |= RAIDF_WLABEL;
                   1289:                else
                   1290:                        rs->sc_flags &= ~RAIDF_WLABEL;
                   1291:                break;
                   1292:
                   1293:        case DIOCGDEFLABEL:
                   1294:                db1_printf(("DIOCGDEFLABEL\n"));
                   1295:                raidgetdefaultlabel(raidPtrs[unit], rs,
1.9       oster    1296:                    (struct disklabel *) data);
1.1       oster    1297:                break;
                   1298:
                   1299:        default:
1.9       oster    1300:                retcode = ENOTTY;       /* XXXX ?? OR EINVAL ? */
1.1       oster    1301:        }
1.9       oster    1302:        return (retcode);
1.1       oster    1303:
                   1304: }
                   1305:
                   1306:
1.9       oster    1307: /* raidinit -- complete the rest of the initialization for the
1.1       oster    1308:    RAIDframe device.  */
                   1309:
                   1310:
                   1311: static int
1.9       oster    1312: raidinit(dev, raidPtr, unit)
                   1313:        dev_t   dev;
1.1       oster    1314:        RF_Raid_t *raidPtr;
1.9       oster    1315:        int     unit;
1.1       oster    1316: {
1.9       oster    1317:        int     retcode;
                   1318:        /* int ix; */
                   1319:        /* struct raidbuf *raidbp; */
1.1       oster    1320:        struct raid_softc *rs;
                   1321:
                   1322:        retcode = 0;
                   1323:
                   1324:        rs = &raid_softc[unit];
                   1325:        pool_init(&rs->sc_cbufpool, sizeof(struct raidbuf), 0,
1.11      oster    1326:                  0, 0, "raidpl", 0, NULL, NULL, M_RAIDFRAME);
1.9       oster    1327:
1.1       oster    1328:
                   1329:        /* XXX should check return code first... */
                   1330:        rs->sc_flags |= RAIDF_INITED;
                   1331:
1.9       oster    1332:        sprintf(rs->sc_xname, "raid%d", unit);  /* XXX doesn't check bounds. */
1.1       oster    1333:
1.9       oster    1334:        rs->sc_dkdev.dk_name = rs->sc_xname;
1.11      oster    1335:
1.1       oster    1336:        /* disk_attach actually creates space for the CPU disklabel, among
1.9       oster    1337:         * other things, so it's critical to call this *BEFORE* we try putzing
                   1338:         * with disklabels. */
1.11      oster    1339:
1.1       oster    1340:        disk_attach(&rs->sc_dkdev);
                   1341:
                   1342:        /* XXX There may be a weird interaction here between this, and
1.9       oster    1343:         * protectedSectors, as used in RAIDframe.  */
1.11      oster    1344:
1.9       oster    1345:        rs->sc_size = raidPtr->totalSectors;
1.1       oster    1346:        rs->sc_dev = dev;
1.11      oster    1347:
1.9       oster    1348:        return (retcode);
1.1       oster    1349: }
                   1350:
                   1351: /*
                   1352:  * This kernel thread never exits.  It is created once, and persists
                   1353:  * until the system reboots.
                   1354:  */
1.11      oster    1355:
1.9       oster    1356: void
                   1357: rf_ReconKernelThread()
1.1       oster    1358: {
1.9       oster    1359:        struct rf_recon_req *req;
                   1360:        int     s;
1.1       oster    1361:
1.9       oster    1362:        /* XXX not sure what spl() level we should be at here... probably
                   1363:         * splbio() */
                   1364:        s = splbio();
1.1       oster    1365:
1.9       oster    1366:        while (1) {
                   1367:                /* grab the next reconstruction request from the queue */
                   1368:                LOCK_RECON_Q_MUTEX();
                   1369:                while (!recon_queue) {
                   1370:                        UNLOCK_RECON_Q_MUTEX();
1.15      oster    1371:                        tsleep(&recon_queue, PRIBIO,
1.11      oster    1372:                               "raidframe recon", 0);
1.9       oster    1373:                        LOCK_RECON_Q_MUTEX();
                   1374:                }
                   1375:                req = recon_queue;
                   1376:                recon_queue = recon_queue->next;
                   1377:                UNLOCK_RECON_Q_MUTEX();
                   1378:
                   1379:                /*
                   1380:                 * If flags specifies that we should start recon, this call
1.11      oster    1381:                 * will not return until reconstruction completes, fails,
                   1382:                 * or is aborted.
1.9       oster    1383:                 */
                   1384:                rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
                   1385:                    ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
1.1       oster    1386:
1.9       oster    1387:                RF_Free(req, sizeof(*req));
                   1388:        }
1.1       oster    1389: }
                   1390: /* wake up the daemon & tell it to get us a spare table
                   1391:  * XXX
1.9       oster    1392:  * the entries in the queues should be tagged with the raidPtr
1.11      oster    1393:  * so that in the extremely rare case that two recons happen at once,
                   1394:  * we know for which device were requesting a spare table
1.1       oster    1395:  * XXX
                   1396:  */
1.9       oster    1397: int
                   1398: rf_GetSpareTableFromDaemon(req)
                   1399:        RF_SparetWait_t *req;
                   1400: {
                   1401:        int     retcode;
                   1402:
                   1403:        RF_LOCK_MUTEX(rf_sparet_wait_mutex);
                   1404:        req->next = rf_sparet_wait_queue;
                   1405:        rf_sparet_wait_queue = req;
                   1406:        wakeup(&rf_sparet_wait_queue);
                   1407:
                   1408:        /* mpsleep unlocks the mutex */
                   1409:        while (!rf_sparet_resp_queue) {
1.15      oster    1410:                tsleep(&rf_sparet_resp_queue, PRIBIO,
1.9       oster    1411:                    "raidframe getsparetable", 0);
                   1412:        }
                   1413:        req = rf_sparet_resp_queue;
                   1414:        rf_sparet_resp_queue = req->next;
                   1415:        RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
                   1416:
                   1417:        retcode = req->fcol;
                   1418:        RF_Free(req, sizeof(*req));     /* this is not the same req as we
                   1419:                                         * alloc'd */
                   1420:        return (retcode);
1.1       oster    1421: }
1.11      oster    1422: /* a wrapper around rf_DoAccess that extracts appropriate info from the
                   1423:  * bp & passes it down.
1.1       oster    1424:  * any calls originating in the kernel must use non-blocking I/O
                   1425:  * do some extra sanity checking to return "appropriate" error values for
                   1426:  * certain conditions (to make some standard utilities work)
                   1427:  */
1.9       oster    1428: int
                   1429: rf_DoAccessKernel(raidPtr, bp, flags, cbFunc, cbArg)
                   1430:        RF_Raid_t *raidPtr;
                   1431:        struct buf *bp;
                   1432:        RF_RaidAccessFlags_t flags;
                   1433:        void    (*cbFunc) (struct buf *);
                   1434:        void   *cbArg;
1.1       oster    1435: {
                   1436:        RF_SectorCount_t num_blocks, pb, sum;
                   1437:        RF_RaidAddr_t raid_addr;
1.9       oster    1438:        int     retcode;
1.1       oster    1439:        struct partition *pp;
1.9       oster    1440:        daddr_t blocknum;
                   1441:        int     unit;
1.1       oster    1442:        struct raid_softc *rs;
1.9       oster    1443:        int     do_async;
1.1       oster    1444:
                   1445:        /* XXX The dev_t used here should be for /dev/[r]raid* !!! */
                   1446:
                   1447:        unit = raidPtr->raidid;
                   1448:        rs = &raid_softc[unit];
                   1449:
                   1450:        /* Ok, for the bp we have here, bp->b_blkno is relative to the
1.9       oster    1451:         * partition.. Need to make it absolute to the underlying device.. */
1.1       oster    1452:
                   1453:        blocknum = bp->b_blkno;
                   1454:        if (DISKPART(bp->b_dev) != RAW_PART) {
                   1455:                pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
                   1456:                blocknum += pp->p_offset;
1.9       oster    1457:                db1_printf(("updated: %d %d\n", DISKPART(bp->b_dev),
                   1458:                        pp->p_offset));
1.1       oster    1459:        } else {
                   1460:                db1_printf(("Is raw..\n"));
                   1461:        }
                   1462:        db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno, (int) blocknum));
                   1463:
1.9       oster    1464:        db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
                   1465:        db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1       oster    1466:
1.9       oster    1467:        /* *THIS* is where we adjust what block we're going to... but DO NOT
                   1468:         * TOUCH bp->b_blkno!!! */
1.1       oster    1469:        raid_addr = blocknum;
1.9       oster    1470:
1.1       oster    1471:        num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1.9       oster    1472:        pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1.1       oster    1473:        sum = raid_addr + num_blocks + pb;
                   1474:        if (1 || rf_debugKernelAccess) {
1.9       oster    1475:                db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
                   1476:                        (int) raid_addr, (int) sum, (int) num_blocks,
                   1477:                        (int) pb, (int) bp->b_resid));
1.1       oster    1478:        }
                   1479:        if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1.9       oster    1480:            || (sum < num_blocks) || (sum < pb)) {
1.1       oster    1481:                bp->b_error = ENOSPC;
                   1482:                bp->b_flags |= B_ERROR;
                   1483:                bp->b_resid = bp->b_bcount;
                   1484:                biodone(bp);
1.9       oster    1485:                return (bp->b_error);
1.1       oster    1486:        }
                   1487:        /*
                   1488:         * XXX rf_DoAccess() should do this, not just DoAccessKernel()
                   1489:         */
                   1490:
                   1491:        if (bp->b_bcount & raidPtr->sectorMask) {
                   1492:                bp->b_error = EINVAL;
                   1493:                bp->b_flags |= B_ERROR;
                   1494:                bp->b_resid = bp->b_bcount;
                   1495:                biodone(bp);
1.9       oster    1496:                return (bp->b_error);
1.1       oster    1497:        }
                   1498:        db1_printf(("Calling DoAccess..\n"));
                   1499:
1.20      oster    1500:
                   1501:        /* Put a throttle on the number of requests we handle simultanously */
                   1502:
                   1503:        RF_LOCK_MUTEX(raidPtr->mutex);
                   1504:
                   1505:        while(raidPtr->openings <= 0) {
                   1506:                RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1507:                (void)tsleep(&raidPtr->openings, PRIBIO, "rfdwait", 0);
                   1508:                RF_LOCK_MUTEX(raidPtr->mutex);
                   1509:        }
                   1510:        raidPtr->openings--;
                   1511:
                   1512:        RF_UNLOCK_MUTEX(raidPtr->mutex);
                   1513:
1.7       explorer 1514:        /*
1.20      oster    1515:         * Everything is async.
1.7       explorer 1516:         */
                   1517:        do_async = 1;
                   1518:
1.9       oster    1519:        /* don't ever condition on bp->b_flags & B_WRITE.  always condition on
                   1520:         * B_READ instead */
                   1521:        retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
                   1522:            RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
                   1523:            do_async, raid_addr, num_blocks,
                   1524:            bp->b_un.b_addr,
                   1525:            bp, NULL, NULL, RF_DAG_NONBLOCKING_IO | flags,
                   1526:            NULL, cbFunc, cbArg);
1.5       oster    1527: #if 0
1.9       oster    1528:        db1_printf(("After call to DoAccess: 0x%x 0x%x %d\n", bp,
                   1529:                bp->b_data, (int) bp->b_resid));
1.5       oster    1530: #endif
1.7       explorer 1531:
1.9       oster    1532:        return (retcode);
1.1       oster    1533: }
                   1534: /* invoke an I/O from kernel mode.  Disk queue should be locked upon entry */
                   1535:
1.9       oster    1536: int
                   1537: rf_DispatchKernelIO(queue, req)
                   1538:        RF_DiskQueue_t *queue;
                   1539:        RF_DiskQueueData_t *req;
1.1       oster    1540: {
1.9       oster    1541:        int     op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1       oster    1542:        struct buf *bp;
1.9       oster    1543:        struct raidbuf *raidbp = NULL;
1.1       oster    1544:        struct raid_softc *rs;
1.9       oster    1545:        int     unit;
                   1546:
1.1       oster    1547:        /* XXX along with the vnode, we also need the softc associated with
1.9       oster    1548:         * this device.. */
                   1549:
1.1       oster    1550:        req->queue = queue;
1.9       oster    1551:
1.1       oster    1552:        unit = queue->raidPtr->raidid;
                   1553:
1.9       oster    1554:        db1_printf(("DispatchKernelIO unit: %d\n", unit));
1.1       oster    1555:
1.9       oster    1556:        if (unit >= numraid) {
                   1557:                printf("Invalid unit number: %d %d\n", unit, numraid);
1.1       oster    1558:                panic("Invalid Unit number in rf_DispatchKernelIO\n");
                   1559:        }
                   1560:        rs = &raid_softc[unit];
                   1561:
                   1562:        /* XXX is this the right place? */
1.9       oster    1563:        disk_busy(&rs->sc_dkdev);
1.1       oster    1564:
                   1565:        bp = req->bp;
1.16      oster    1566: #if 1
1.9       oster    1567:        /* XXX when there is a physical disk failure, someone is passing us a
                   1568:         * buffer that contains old stuff!!  Attempt to deal with this problem
                   1569:         * without taking a performance hit... (not sure where the real bug
                   1570:         * is.  It's buried in RAIDframe somewhere) :-(  GO ) */
1.4       oster    1571:
                   1572:        if (bp->b_flags & B_ERROR) {
                   1573:                bp->b_flags &= ~B_ERROR;
                   1574:        }
1.9       oster    1575:        if (bp->b_error != 0) {
1.4       oster    1576:                bp->b_error = 0;
                   1577:        }
1.16      oster    1578: #endif
1.1       oster    1579:        raidbp = RAIDGETBUF(rs);
                   1580:
1.9       oster    1581:        raidbp->rf_flags = 0;   /* XXX not really used anywhere... */
1.1       oster    1582:
                   1583:        /*
                   1584:         * context for raidiodone
                   1585:         */
                   1586:        raidbp->rf_obp = bp;
                   1587:        raidbp->req = req;
                   1588:
                   1589:        switch (req->type) {
1.9       oster    1590:        case RF_IO_TYPE_NOP:    /* used primarily to unlock a locked queue */
                   1591:                /* Dprintf2("rf_DispatchKernelIO: NOP to r %d c %d\n",
                   1592:                 * queue->row, queue->col); */
1.1       oster    1593:                /* XXX need to do something extra here.. */
1.9       oster    1594:                /* I'm leaving this in, as I've never actually seen it used,
                   1595:                 * and I'd like folks to report it... GO */
1.1       oster    1596:                printf(("WAKEUP CALLED\n"));
                   1597:                queue->numOutstanding++;
                   1598:
                   1599:                /* XXX need to glue the original buffer into this??  */
                   1600:
                   1601:                KernelWakeupFunc(&raidbp->rf_buf);
                   1602:                break;
1.9       oster    1603:
1.1       oster    1604:        case RF_IO_TYPE_READ:
                   1605:        case RF_IO_TYPE_WRITE:
1.9       oster    1606:
1.1       oster    1607:                if (req->tracerec) {
                   1608:                        RF_ETIMER_START(req->tracerec->timer);
                   1609:                }
1.9       oster    1610:                InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
                   1611:                    op | bp->b_flags, queue->rf_cinfo->ci_dev,
                   1612:                    req->sectorOffset, req->numSector,
                   1613:                    req->buf, KernelWakeupFunc, (void *) req,
                   1614:                    queue->raidPtr->logBytesPerSector, req->b_proc);
1.1       oster    1615:
                   1616:                if (rf_debugKernelAccess) {
1.9       oster    1617:                        db1_printf(("dispatch: bp->b_blkno = %ld\n",
                   1618:                                (long) bp->b_blkno));
1.1       oster    1619:                }
                   1620:                queue->numOutstanding++;
                   1621:                queue->last_deq_sector = req->sectorOffset;
1.9       oster    1622:                /* acc wouldn't have been let in if there were any pending
                   1623:                 * reqs at any other priority */
1.1       oster    1624:                queue->curPriority = req->priority;
1.9       oster    1625:                /* Dprintf3("rf_DispatchKernelIO: %c to row %d col %d\n",
                   1626:                 * req->type, queue->row, queue->col); */
1.1       oster    1627:
                   1628:                db1_printf(("Going for %c to unit %d row %d col %d\n",
1.9       oster    1629:                        req->type, unit, queue->row, queue->col));
1.1       oster    1630:                db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9       oster    1631:                        (int) req->sectorOffset, (int) req->numSector,
                   1632:                        (int) (req->numSector <<
                   1633:                            queue->raidPtr->logBytesPerSector),
                   1634:                        (int) queue->raidPtr->logBytesPerSector));
1.1       oster    1635:                if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
                   1636:                        raidbp->rf_buf.b_vp->v_numoutput++;
                   1637:                }
1.9       oster    1638:                VOP_STRATEGY(&raidbp->rf_buf);
1.1       oster    1639:
                   1640:                break;
1.9       oster    1641:
1.1       oster    1642:        default:
                   1643:                panic("bad req->type in rf_DispatchKernelIO");
                   1644:        }
                   1645:        db1_printf(("Exiting from DispatchKernelIO\n"));
1.9       oster    1646:        return (0);
1.1       oster    1647: }
1.9       oster    1648: /* this is the callback function associated with a I/O invoked from
1.1       oster    1649:    kernel code.
                   1650:  */
1.9       oster    1651: static void
                   1652: KernelWakeupFunc(vbp)
                   1653:        struct buf *vbp;
                   1654: {
                   1655:        RF_DiskQueueData_t *req = NULL;
                   1656:        RF_DiskQueue_t *queue;
                   1657:        struct raidbuf *raidbp = (struct raidbuf *) vbp;
                   1658:        struct buf *bp;
                   1659:        struct raid_softc *rs;
                   1660:        int     unit;
                   1661:        register int s;
                   1662:
                   1663:        s = splbio();           /* XXX */
                   1664:        db1_printf(("recovering the request queue:\n"));
                   1665:        req = raidbp->req;
1.1       oster    1666:
1.9       oster    1667:        bp = raidbp->rf_obp;
1.5       oster    1668: #if 0
1.9       oster    1669:        db1_printf(("bp=0x%x\n", bp));
1.5       oster    1670: #endif
1.1       oster    1671:
1.9       oster    1672:        queue = (RF_DiskQueue_t *) req->queue;
1.1       oster    1673:
1.9       oster    1674:        if (raidbp->rf_buf.b_flags & B_ERROR) {
1.1       oster    1675: #if 0
1.9       oster    1676:                printf("Setting bp->b_flags!!! %d\n", raidbp->rf_buf.b_error);
1.1       oster    1677: #endif
1.9       oster    1678:                bp->b_flags |= B_ERROR;
                   1679:                bp->b_error = raidbp->rf_buf.b_error ?
                   1680:                    raidbp->rf_buf.b_error : EIO;
                   1681:        }
1.5       oster    1682: #if 0
1.9       oster    1683:        db1_printf(("raidbp->rf_buf.b_bcount=%d\n", (int) raidbp->rf_buf.b_bcount));
                   1684:        db1_printf(("raidbp->rf_buf.b_bufsize=%d\n", (int) raidbp->rf_buf.b_bufsize));
                   1685:        db1_printf(("raidbp->rf_buf.b_resid=%d\n", (int) raidbp->rf_buf.b_resid));
                   1686:        db1_printf(("raidbp->rf_buf.b_data=0x%x\n", raidbp->rf_buf.b_data));
1.5       oster    1687: #endif
1.1       oster    1688:
1.9       oster    1689:        /* XXX methinks this could be wrong... */
1.1       oster    1690: #if 1
1.9       oster    1691:        bp->b_resid = raidbp->rf_buf.b_resid;
1.1       oster    1692: #endif
                   1693:
1.9       oster    1694:        if (req->tracerec) {
                   1695:                RF_ETIMER_STOP(req->tracerec->timer);
                   1696:                RF_ETIMER_EVAL(req->tracerec->timer);
                   1697:                RF_LOCK_MUTEX(rf_tracing_mutex);
                   1698:                req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1699:                req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
                   1700:                req->tracerec->num_phys_ios++;
                   1701:                RF_UNLOCK_MUTEX(rf_tracing_mutex);
                   1702:        }
                   1703:        bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1.1       oster    1704:
1.9       oster    1705:        unit = queue->raidPtr->raidid;  /* *Much* simpler :-> */
1.1       oster    1706:
                   1707:
1.9       oster    1708:        /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
                   1709:         * ballistic, and mark the component as hosed... */
                   1710: #if 1
                   1711:        if (bp->b_flags & B_ERROR) {
                   1712:                /* Mark the disk as dead */
                   1713:                /* but only mark it once... */
                   1714:                if (queue->raidPtr->Disks[queue->row][queue->col].status ==
                   1715:                    rf_ds_optimal) {
                   1716:                        printf("raid%d: IO Error.  Marking %s as failed.\n",
                   1717:                            unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
                   1718:                        queue->raidPtr->Disks[queue->row][queue->col].status =
                   1719:                            rf_ds_failed;
                   1720:                        queue->raidPtr->status[queue->row] = rf_rs_degraded;
                   1721:                        queue->raidPtr->numFailures++;
1.11      oster    1722:                        /* XXX here we should bump the version number for each component, and write that data out */
1.9       oster    1723:                } else {        /* Disk is already dead... */
                   1724:                        /* printf("Disk already marked as dead!\n"); */
                   1725:                }
1.4       oster    1726:
1.9       oster    1727:        }
1.4       oster    1728: #endif
                   1729:
1.9       oster    1730:        rs = &raid_softc[unit];
                   1731:        RAIDPUTBUF(rs, raidbp);
                   1732:
1.4       oster    1733:
1.9       oster    1734:        if (bp->b_resid == 0) {
                   1735:                db1_printf(("Disk is no longer busy for this buffer... %d %ld %ld\n",
                   1736:                        unit, bp->b_resid, bp->b_bcount));
                   1737:                /* XXX is this the right place for a disk_unbusy()??!??!?!? */
                   1738:                disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid));
                   1739:        } else {
                   1740:                db1_printf(("b_resid is still %ld\n", bp->b_resid));
                   1741:        }
1.1       oster    1742:
1.9       oster    1743:        rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
                   1744:        (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
                   1745:        /* printf("Exiting KernelWakeupFunc\n"); */
1.1       oster    1746:
1.9       oster    1747:        splx(s);                /* XXX */
1.1       oster    1748: }
                   1749:
                   1750:
                   1751:
                   1752: /*
                   1753:  * initialize a buf structure for doing an I/O in the kernel.
                   1754:  */
1.9       oster    1755: static void
                   1756: InitBP(
                   1757:     struct buf * bp,
                   1758:     struct vnode * b_vp,
                   1759:     unsigned rw_flag,
                   1760:     dev_t dev,
                   1761:     RF_SectorNum_t startSect,
                   1762:     RF_SectorCount_t numSect,
                   1763:     caddr_t buf,
                   1764:     void (*cbFunc) (struct buf *),
                   1765:     void *cbArg,
                   1766:     int logBytesPerSector,
                   1767:     struct proc * b_proc)
                   1768: {
                   1769:        /* bp->b_flags       = B_PHYS | rw_flag; */
                   1770:        bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
                   1771:        bp->b_bcount = numSect << logBytesPerSector;
                   1772:        bp->b_bufsize = bp->b_bcount;
                   1773:        bp->b_error = 0;
                   1774:        bp->b_dev = dev;
1.1       oster    1775:        db1_printf(("bp->b_dev is %d\n", dev));
1.9       oster    1776:        bp->b_un.b_addr = buf;
1.5       oster    1777: #if 0
1.9       oster    1778:        db1_printf(("bp->b_data=0x%x\n", bp->b_data));
1.5       oster    1779: #endif
1.1       oster    1780:
1.9       oster    1781:        bp->b_blkno = startSect;
                   1782:        bp->b_resid = bp->b_bcount;     /* XXX is this right!??!?!! */
                   1783:        db1_printf(("b_bcount is: %d\n", (int) bp->b_bcount));
1.1       oster    1784:        if (bp->b_bcount == 0) {
                   1785:                panic("bp->b_bcount is zero in InitBP!!\n");
                   1786:        }
1.9       oster    1787:        bp->b_proc = b_proc;
                   1788:        bp->b_iodone = cbFunc;
                   1789:        bp->b_vp = b_vp;
                   1790:
1.1       oster    1791: }
                   1792:
                   1793: static void
                   1794: raidgetdefaultlabel(raidPtr, rs, lp)
                   1795:        RF_Raid_t *raidPtr;
                   1796:        struct raid_softc *rs;
                   1797:        struct disklabel *lp;
                   1798: {
                   1799:        db1_printf(("Building a default label...\n"));
                   1800:        bzero(lp, sizeof(*lp));
                   1801:
                   1802:        /* fabricate a label... */
                   1803:        lp->d_secperunit = raidPtr->totalSectors;
                   1804:        lp->d_secsize = raidPtr->bytesPerSector;
                   1805:        lp->d_nsectors = 1024 * (1024 / raidPtr->bytesPerSector);
                   1806:        lp->d_ntracks = 1;
                   1807:        lp->d_ncylinders = raidPtr->totalSectors / lp->d_nsectors;
                   1808:        lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
                   1809:
                   1810:        strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9       oster    1811:        lp->d_type = DTYPE_RAID;
1.1       oster    1812:        strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
                   1813:        lp->d_rpm = 3600;
                   1814:        lp->d_interleave = 1;
                   1815:        lp->d_flags = 0;
                   1816:
                   1817:        lp->d_partitions[RAW_PART].p_offset = 0;
                   1818:        lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
                   1819:        lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
                   1820:        lp->d_npartitions = RAW_PART + 1;
                   1821:
                   1822:        lp->d_magic = DISKMAGIC;
                   1823:        lp->d_magic2 = DISKMAGIC;
                   1824:        lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
                   1825:
                   1826: }
                   1827: /*
                   1828:  * Read the disklabel from the raid device.  If one is not present, fake one
                   1829:  * up.
                   1830:  */
                   1831: static void
                   1832: raidgetdisklabel(dev)
1.9       oster    1833:        dev_t   dev;
1.1       oster    1834: {
1.9       oster    1835:        int     unit = raidunit(dev);
1.1       oster    1836:        struct raid_softc *rs = &raid_softc[unit];
1.9       oster    1837:        char   *errstring;
1.1       oster    1838:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   1839:        struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
                   1840:        RF_Raid_t *raidPtr;
                   1841:
                   1842:        db1_printf(("Getting the disklabel...\n"));
                   1843:
                   1844:        bzero(clp, sizeof(*clp));
                   1845:
                   1846:        raidPtr = raidPtrs[unit];
                   1847:
                   1848:        raidgetdefaultlabel(raidPtr, rs, lp);
                   1849:
                   1850:        /*
                   1851:         * Call the generic disklabel extraction routine.
                   1852:         */
                   1853:        errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
                   1854:            rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9       oster    1855:        if (errstring)
1.1       oster    1856:                raidmakedisklabel(rs);
                   1857:        else {
1.9       oster    1858:                int     i;
1.1       oster    1859:                struct partition *pp;
                   1860:
                   1861:                /*
                   1862:                 * Sanity check whether the found disklabel is valid.
                   1863:                 *
                   1864:                 * This is necessary since total size of the raid device
                   1865:                 * may vary when an interleave is changed even though exactly
                   1866:                 * same componets are used, and old disklabel may used
                   1867:                 * if that is found.
                   1868:                 */
                   1869:                if (lp->d_secperunit != rs->sc_size)
                   1870:                        printf("WARNING: %s: "
                   1871:                            "total sector size in disklabel (%d) != "
1.18      oster    1872:                            "the size of raid (%ld)\n", rs->sc_xname,
                   1873:                            lp->d_secperunit, (long) rs->sc_size);
1.1       oster    1874:                for (i = 0; i < lp->d_npartitions; i++) {
                   1875:                        pp = &lp->d_partitions[i];
                   1876:                        if (pp->p_offset + pp->p_size > rs->sc_size)
                   1877:                                printf("WARNING: %s: end of partition `%c' "
1.18      oster    1878:                                    "exceeds the size of raid (%ld)\n",
                   1879:                                    rs->sc_xname, 'a' + i, (long) rs->sc_size);
1.1       oster    1880:                }
                   1881:        }
                   1882:
                   1883: }
                   1884: /*
                   1885:  * Take care of things one might want to take care of in the event
                   1886:  * that a disklabel isn't present.
                   1887:  */
                   1888: static void
                   1889: raidmakedisklabel(rs)
                   1890:        struct raid_softc *rs;
                   1891: {
                   1892:        struct disklabel *lp = rs->sc_dkdev.dk_label;
                   1893:        db1_printf(("Making a label..\n"));
                   1894:
                   1895:        /*
                   1896:         * For historical reasons, if there's no disklabel present
                   1897:         * the raw partition must be marked FS_BSDFFS.
                   1898:         */
                   1899:
                   1900:        lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
                   1901:
                   1902:        strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
                   1903:
                   1904:        lp->d_checksum = dkcksum(lp);
                   1905: }
                   1906: /*
                   1907:  * Lookup the provided name in the filesystem.  If the file exists,
                   1908:  * is a valid block device, and isn't being used by anyone else,
                   1909:  * set *vpp to the file's vnode.
1.9       oster    1910:  * You'll find the original of this in ccd.c
1.1       oster    1911:  */
                   1912: int
                   1913: raidlookup(path, p, vpp)
1.9       oster    1914:        char   *path;
1.1       oster    1915:        struct proc *p;
                   1916:        struct vnode **vpp;     /* result */
                   1917: {
                   1918:        struct nameidata nd;
                   1919:        struct vnode *vp;
                   1920:        struct vattr va;
1.9       oster    1921:        int     error;
1.1       oster    1922:
                   1923:        NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1.9       oster    1924:        if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1.1       oster    1925: #ifdef DEBUG
1.9       oster    1926:                printf("RAIDframe: vn_open returned %d\n", error);
1.1       oster    1927: #endif
                   1928:                return (error);
                   1929:        }
                   1930:        vp = nd.ni_vp;
                   1931:        if (vp->v_usecount > 1) {
                   1932:                VOP_UNLOCK(vp, 0);
1.9       oster    1933:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    1934:                return (EBUSY);
                   1935:        }
                   1936:        if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
                   1937:                VOP_UNLOCK(vp, 0);
1.9       oster    1938:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    1939:                return (error);
                   1940:        }
                   1941:        /* XXX: eventually we should handle VREG, too. */
                   1942:        if (va.va_type != VBLK) {
                   1943:                VOP_UNLOCK(vp, 0);
1.9       oster    1944:                (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1       oster    1945:                return (ENOTBLK);
                   1946:        }
                   1947:        VOP_UNLOCK(vp, 0);
                   1948:        *vpp = vp;
                   1949:        return (0);
                   1950: }
                   1951: /*
                   1952:  * Wait interruptibly for an exclusive lock.
                   1953:  *
                   1954:  * XXX
                   1955:  * Several drivers do this; it should be abstracted and made MP-safe.
                   1956:  * (Hmm... where have we seen this warning before :->  GO )
                   1957:  */
                   1958: static int
                   1959: raidlock(rs)
                   1960:        struct raid_softc *rs;
                   1961: {
1.9       oster    1962:        int     error;
1.1       oster    1963:
                   1964:        while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
                   1965:                rs->sc_flags |= RAIDF_WANTED;
1.9       oster    1966:                if ((error =
                   1967:                        tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1       oster    1968:                        return (error);
                   1969:        }
                   1970:        rs->sc_flags |= RAIDF_LOCKED;
                   1971:        return (0);
                   1972: }
                   1973: /*
                   1974:  * Unlock and wake up any waiters.
                   1975:  */
                   1976: static void
                   1977: raidunlock(rs)
                   1978:        struct raid_softc *rs;
                   1979: {
                   1980:
                   1981:        rs->sc_flags &= ~RAIDF_LOCKED;
                   1982:        if ((rs->sc_flags & RAIDF_WANTED) != 0) {
                   1983:                rs->sc_flags &= ~RAIDF_WANTED;
                   1984:                wakeup(rs);
                   1985:        }
1.11      oster    1986: }
                   1987:
                   1988:
                   1989: #define RF_COMPONENT_INFO_OFFSET  16384 /* bytes */
                   1990: #define RF_COMPONENT_INFO_SIZE     1024 /* bytes */
                   1991:
                   1992: int
1.12      oster    1993: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
                   1994: {
                   1995:        RF_ComponentLabel_t component_label;
                   1996:        raidread_component_label(dev, b_vp, &component_label);
                   1997:        component_label.mod_counter = mod_counter;
                   1998:        component_label.clean = RF_RAID_CLEAN;
                   1999:        raidwrite_component_label(dev, b_vp, &component_label);
                   2000:        return(0);
                   2001: }
                   2002:
                   2003:
                   2004: int
                   2005: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1.11      oster    2006: {
1.12      oster    2007:        RF_ComponentLabel_t component_label;
                   2008:        raidread_component_label(dev, b_vp, &component_label);
                   2009:        component_label.mod_counter = mod_counter;
                   2010:        component_label.clean = RF_RAID_DIRTY;
                   2011:        raidwrite_component_label(dev, b_vp, &component_label);
1.11      oster    2012:        return(0);
                   2013: }
                   2014:
                   2015: /* ARGSUSED */
                   2016: int
1.29.8.1! wrstuden 2017: raidread_component_label(dev, b_vp, component_label, bshift, bsize)
1.11      oster    2018:        dev_t dev;
                   2019:        struct vnode *b_vp;
                   2020:        RF_ComponentLabel_t *component_label;
1.29.8.1! wrstuden 2021:        int bshift;
        !          2022:        int bsize;
1.11      oster    2023: {
                   2024:        struct buf *bp;
                   2025:        int error;
                   2026:
                   2027:        /* XXX should probably ensure that we don't try to do this if
                   2028:           someone has changed rf_protected_sectors. */
                   2029:
                   2030:        /* get a block of the appropriate size... */
                   2031:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1.29.8.1! wrstuden 2032:        if (bshift < 0) {
        !          2033:                error = EINVAL;
        !          2034:                goto out;
        !          2035:        }
1.11      oster    2036:        bp->b_dev = dev;
1.29.8.1! wrstuden 2037:        bp->b_bshift = bshift;
        !          2038:        bp->b_bsize = blocksize(bshift);
1.11      oster    2039:
                   2040:        /* get our ducks in a row for the read */
1.29.8.1! wrstuden 2041:        bp->b_blkno = btodb(RF_COMPONENT_INFO_OFFSET, bshift);
        !          2042:        bp->b_resid = btodb(RF_COMPONENT_INFO_SIZE , bshift);
1.11      oster    2043:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
                   2044:        bp->b_flags = B_BUSY | B_READ;
                   2045:
                   2046:        (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
                   2047:
                   2048:        error = biowait(bp);
                   2049:
1.29.8.1! wrstuden 2050: out:
1.11      oster    2051:        if (!error) {
                   2052:                memcpy(component_label, bp->b_un.b_addr,
                   2053:                       sizeof(RF_ComponentLabel_t));
1.12      oster    2054: #if 0
1.11      oster    2055:                printf("raidread_component_label: got component label:\n");
                   2056:                printf("Version: %d\n",component_label->version);
                   2057:                printf("Serial Number: %d\n",component_label->serial_number);
                   2058:                printf("Mod counter: %d\n",component_label->mod_counter);
                   2059:                printf("Row: %d\n", component_label->row);
                   2060:                printf("Column: %d\n", component_label->column);
                   2061:                printf("Num Rows: %d\n", component_label->num_rows);
                   2062:                printf("Num Columns: %d\n", component_label->num_columns);
                   2063:                printf("Clean: %d\n", component_label->clean);
                   2064:                printf("Status: %d\n", component_label->status);
                   2065: #endif
                   2066:         } else {
                   2067:                printf("Failed to read RAID component label!\n");
                   2068:        }
                   2069:
                   2070:         bp->b_flags = B_INVAL | B_AGE;
                   2071:        brelse(bp);
                   2072:        return(error);
                   2073: }
                   2074: /* ARGSUSED */
                   2075: int
1.29.8.1! wrstuden 2076: raidwrite_component_label(dev, b_vp, component_label, bshift, bsize)
1.11      oster    2077:        dev_t dev;
                   2078:        struct vnode *b_vp;
                   2079:        RF_ComponentLabel_t *component_label;
1.29.8.1! wrstuden 2080:        int bshift;
        !          2081:        int bsize;
1.11      oster    2082: {
                   2083:        struct buf *bp;
                   2084:        int error;
                   2085:
                   2086:        /* get a block of the appropriate size... */
                   2087:        bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
1.29.8.1! wrstuden 2088:        if (bshift < 0) {
        !          2089:                error = EINVAL;
        !          2090:                goto out;
        !          2091:        }
1.11      oster    2092:        bp->b_dev = dev;
1.29.8.1! wrstuden 2093:        bp->b_bshift = bshift;
        !          2094:        bp->b_bsize = blocksize(bshift);
1.11      oster    2095:
                   2096:        /* get our ducks in a row for the write */
1.29.8.1! wrstuden 2097:        bp->b_blkno = btodb(RF_COMPONENT_INFO_OFFSET, bshift);
        !          2098:        bp->b_resid = btodb(RF_COMPONENT_INFO_SIZE, bshift);
1.11      oster    2099:        bp->b_bcount = RF_COMPONENT_INFO_SIZE;
                   2100:        bp->b_flags = B_BUSY | B_WRITE;
                   2101:
                   2102:        memset( bp->b_un.b_addr, 0, RF_COMPONENT_INFO_SIZE );
                   2103:
                   2104:        memcpy( bp->b_un.b_addr, component_label, sizeof(RF_ComponentLabel_t));
                   2105:
                   2106:        (*bdevsw[major(bp->b_dev)].d_strategy)(bp);
                   2107:        error = biowait(bp);
1.29.8.1! wrstuden 2108:
        !          2109: out:
1.11      oster    2110:         bp->b_flags = B_INVAL | B_AGE;
                   2111:        brelse(bp);
                   2112:        if (error) {
                   2113:                printf("Failed to write RAID component info!\n");
                   2114:        }
                   2115:
                   2116:        return(error);
1.1       oster    2117: }
1.12      oster    2118:
                   2119: void
                   2120: rf_markalldirty( raidPtr )
                   2121:        RF_Raid_t *raidPtr;
                   2122: {
                   2123:        RF_ComponentLabel_t c_label;
                   2124:        int r,c;
                   2125:
                   2126:        raidPtr->mod_counter++;
                   2127:        for (r = 0; r < raidPtr->numRow; r++) {
                   2128:                for (c = 0; c < raidPtr->numCol; c++) {
                   2129:                        if (raidPtr->Disks[r][c].status != rf_ds_failed) {
                   2130:                                raidread_component_label(
                   2131:                                        raidPtr->Disks[r][c].dev,
                   2132:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2133:                                        &c_label);
                   2134:                                if (c_label.status == rf_ds_spared) {
                   2135:                                        /* XXX do something special...
                   2136:                                         but whatever you do, don't
                   2137:                                         try to access it!! */
                   2138:                                } else {
                   2139: #if 0
                   2140:                                c_label.status =
                   2141:                                        raidPtr->Disks[r][c].status;
                   2142:                                raidwrite_component_label(
                   2143:                                        raidPtr->Disks[r][c].dev,
                   2144:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2145:                                        &c_label);
                   2146: #endif
                   2147:                                raidmarkdirty(
                   2148:                                       raidPtr->Disks[r][c].dev,
                   2149:                                       raidPtr->raid_cinfo[r][c].ci_vp,
                   2150:                                       raidPtr->mod_counter);
                   2151:                                }
                   2152:                        }
                   2153:                }
                   2154:        }
1.13      oster    2155:        /* printf("Component labels marked dirty.\n"); */
1.12      oster    2156: #if 0
                   2157:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2158:                sparecol = raidPtr->numCol + c;
                   2159:                if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
                   2160:                        /*
                   2161:
                   2162:                           XXX this is where we get fancy and map this spare
                   2163:                           into it's correct spot in the array.
                   2164:
                   2165:                         */
                   2166:                        /*
                   2167:
                   2168:                           we claim this disk is "optimal" if it's
                   2169:                           rf_ds_used_spare, as that means it should be
                   2170:                           directly substitutable for the disk it replaced.
                   2171:                           We note that too...
                   2172:
                   2173:                         */
                   2174:
                   2175:                        for(i=0;i<raidPtr->numRow;i++) {
                   2176:                                for(j=0;j<raidPtr->numCol;j++) {
                   2177:                                        if ((raidPtr->Disks[i][j].spareRow ==
                   2178:                                             r) &&
                   2179:                                            (raidPtr->Disks[i][j].spareCol ==
                   2180:                                             sparecol)) {
                   2181:                                                srow = r;
                   2182:                                                scol = sparecol;
                   2183:                                                break;
                   2184:                                        }
                   2185:                                }
                   2186:                        }
                   2187:
                   2188:                        raidread_component_label(
                   2189:                                      raidPtr->Disks[r][sparecol].dev,
                   2190:                                      raidPtr->raid_cinfo[r][sparecol].ci_vp,
                   2191:                                      &c_label);
                   2192:                        /* make sure status is noted */
                   2193:                        c_label.version = RF_COMPONENT_LABEL_VERSION;
                   2194:                        c_label.mod_counter = raidPtr->mod_counter;
                   2195:                        c_label.serial_number = raidPtr->serial_number;
                   2196:                        c_label.row = srow;
                   2197:                        c_label.column = scol;
                   2198:                        c_label.num_rows = raidPtr->numRow;
                   2199:                        c_label.num_columns = raidPtr->numCol;
                   2200:                        c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
                   2201:                        c_label.status = rf_ds_optimal;
                   2202:                        raidwrite_component_label(
                   2203:                                      raidPtr->Disks[r][sparecol].dev,
                   2204:                                      raidPtr->raid_cinfo[r][sparecol].ci_vp,
                   2205:                                      &c_label);
                   2206:                        raidmarkclean( raidPtr->Disks[r][sparecol].dev,
                   2207:                                      raidPtr->raid_cinfo[r][sparecol].ci_vp);
                   2208:                }
                   2209:        }
                   2210:
                   2211: #endif
                   2212: }
                   2213:
1.13      oster    2214:
                   2215: void
                   2216: rf_update_component_labels( raidPtr )
                   2217:        RF_Raid_t *raidPtr;
                   2218: {
                   2219:        RF_ComponentLabel_t c_label;
                   2220:        int sparecol;
                   2221:        int r,c;
                   2222:        int i,j;
                   2223:        int srow, scol;
                   2224:
                   2225:        srow = -1;
                   2226:        scol = -1;
                   2227:
                   2228:        /* XXX should do extra checks to make sure things really are clean,
                   2229:           rather than blindly setting the clean bit... */
                   2230:
                   2231:        raidPtr->mod_counter++;
                   2232:
                   2233:        for (r = 0; r < raidPtr->numRow; r++) {
                   2234:                for (c = 0; c < raidPtr->numCol; c++) {
                   2235:                        if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
                   2236:                                raidread_component_label(
                   2237:                                        raidPtr->Disks[r][c].dev,
                   2238:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2239:                                        &c_label);
                   2240:                                /* make sure status is noted */
                   2241:                                c_label.status = rf_ds_optimal;
                   2242:                                raidwrite_component_label(
                   2243:                                        raidPtr->Disks[r][c].dev,
                   2244:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2245:                                        &c_label);
                   2246:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
                   2247:                                        raidmarkclean(
                   2248:                                              raidPtr->Disks[r][c].dev,
                   2249:                                              raidPtr->raid_cinfo[r][c].ci_vp,
                   2250:                                              raidPtr->mod_counter);
                   2251:                                }
                   2252:                        }
                   2253:                        /* else we don't touch it.. */
                   2254: #if 0
                   2255:                        else if (raidPtr->Disks[r][c].status !=
                   2256:                                   rf_ds_failed) {
                   2257:                                raidread_component_label(
                   2258:                                        raidPtr->Disks[r][c].dev,
                   2259:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2260:                                        &c_label);
                   2261:                                /* make sure status is noted */
                   2262:                                c_label.status =
                   2263:                                        raidPtr->Disks[r][c].status;
                   2264:                                raidwrite_component_label(
                   2265:                                        raidPtr->Disks[r][c].dev,
                   2266:                                        raidPtr->raid_cinfo[r][c].ci_vp,
                   2267:                                        &c_label);
                   2268:                                if (raidPtr->parity_good == RF_RAID_CLEAN) {
                   2269:                                        raidmarkclean(
                   2270:                                              raidPtr->Disks[r][c].dev,
                   2271:                                              raidPtr->raid_cinfo[r][c].ci_vp,
                   2272:                                              raidPtr->mod_counter);
                   2273:                                }
                   2274:                        }
                   2275: #endif
                   2276:                }
                   2277:        }
                   2278:
                   2279:        for( c = 0; c < raidPtr->numSpare ; c++) {
                   2280:                sparecol = raidPtr->numCol + c;
                   2281:                if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
                   2282:                        /*
                   2283:
                   2284:                           we claim this disk is "optimal" if it's
                   2285:                           rf_ds_used_spare, as that means it should be
                   2286:                           directly substitutable for the disk it replaced.
                   2287:                           We note that too...
                   2288:
                   2289:                         */
                   2290:
                   2291:                        for(i=0;i<raidPtr->numRow;i++) {
                   2292:                                for(j=0;j<raidPtr->numCol;j++) {
                   2293:                                        if ((raidPtr->Disks[i][j].spareRow ==
                   2294:                                             0) &&
                   2295:                                            (raidPtr->Disks[i][j].spareCol ==
                   2296:                                             sparecol)) {
                   2297:                                                srow = i;
                   2298:                                                scol = j;
                   2299:                                                break;
                   2300:                                        }
                   2301:                                }
                   2302:                        }
                   2303:
                   2304:                        raidread_component_label(
                   2305:                                      raidPtr->Disks[0][sparecol].dev,
                   2306:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2307:                                      &c_label);
                   2308:                        /* make sure status is noted */
                   2309:                        c_label.version = RF_COMPONENT_LABEL_VERSION;
                   2310:                        c_label.mod_counter = raidPtr->mod_counter;
                   2311:                        c_label.serial_number = raidPtr->serial_number;
                   2312:                        c_label.row = srow;
                   2313:                        c_label.column = scol;
                   2314:                        c_label.num_rows = raidPtr->numRow;
                   2315:                        c_label.num_columns = raidPtr->numCol;
                   2316:                        c_label.clean = RF_RAID_DIRTY; /* changed in a bit*/
                   2317:                        c_label.status = rf_ds_optimal;
                   2318:                        raidwrite_component_label(
                   2319:                                      raidPtr->Disks[0][sparecol].dev,
                   2320:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2321:                                      &c_label);
                   2322:                        if (raidPtr->parity_good == RF_RAID_CLEAN) {
                   2323:                                raidmarkclean( raidPtr->Disks[0][sparecol].dev,
                   2324:                                      raidPtr->raid_cinfo[0][sparecol].ci_vp,
                   2325:                                               raidPtr->mod_counter);
                   2326:                        }
                   2327:                }
                   2328:        }
                   2329:        /*      printf("Component labels updated\n"); */
                   2330: }

CVSweb <webmaster@jp.NetBSD.org>