Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.117.6.3
1.117.6.3! gehenna 1: /* $NetBSD: rf_netbsdkintf.c,v 1.124 2002/07/13 17:47:44 oster Exp $ */
1.1 oster 2: /*-
3: * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4: * All rights reserved.
5: *
6: * This code is derived from software contributed to The NetBSD Foundation
7: * by Greg Oster; Jason R. Thorpe.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by the NetBSD
20: * Foundation, Inc. and its contributors.
21: * 4. Neither the name of The NetBSD Foundation nor the names of its
22: * contributors may be used to endorse or promote products derived
23: * from this software without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: * POSSIBILITY OF SUCH DAMAGE.
36: */
37:
38: /*
39: * Copyright (c) 1988 University of Utah.
40: * Copyright (c) 1990, 1993
41: * The Regents of the University of California. All rights reserved.
42: *
43: * This code is derived from software contributed to Berkeley by
44: * the Systems Programming Group of the University of Utah Computer
45: * Science Department.
46: *
47: * Redistribution and use in source and binary forms, with or without
48: * modification, are permitted provided that the following conditions
49: * are met:
50: * 1. Redistributions of source code must retain the above copyright
51: * notice, this list of conditions and the following disclaimer.
52: * 2. Redistributions in binary form must reproduce the above copyright
53: * notice, this list of conditions and the following disclaimer in the
54: * documentation and/or other materials provided with the distribution.
55: * 3. All advertising materials mentioning features or use of this software
56: * must display the following acknowledgement:
57: * This product includes software developed by the University of
58: * California, Berkeley and its contributors.
59: * 4. Neither the name of the University nor the names of its contributors
60: * may be used to endorse or promote products derived from this software
61: * without specific prior written permission.
62: *
63: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73: * SUCH DAMAGE.
74: *
75: * from: Utah $Hdr: cd.c 1.6 90/11/28$
76: *
77: * @(#)cd.c 8.2 (Berkeley) 11/16/93
78: */
79:
80:
81:
82:
83: /*
84: * Copyright (c) 1995 Carnegie-Mellon University.
85: * All rights reserved.
86: *
87: * Authors: Mark Holland, Jim Zelenka
88: *
89: * Permission to use, copy, modify and distribute this software and
90: * its documentation is hereby granted, provided that both the copyright
91: * notice and this permission notice appear in all copies of the
92: * software, derivative works or modified versions, and any portions
93: * thereof, and that both notices appear in supporting documentation.
94: *
95: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
96: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
97: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
98: *
99: * Carnegie Mellon requests users of this software to return to
100: *
101: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
102: * School of Computer Science
103: * Carnegie Mellon University
104: * Pittsburgh PA 15213-3890
105: *
106: * any improvements or extensions that they make and grant Carnegie the
107: * rights to redistribute these changes.
108: */
109:
110: /***********************************************************
111: *
112: * rf_kintf.c -- the kernel interface routines for RAIDframe
113: *
114: ***********************************************************/
1.112 lukem 115:
116: #include <sys/cdefs.h>
1.117.6.3! gehenna 117: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.124 2002/07/13 17:47:44 oster Exp $");
1.1 oster 118:
1.113 lukem 119: #include <sys/param.h>
1.1 oster 120: #include <sys/errno.h>
121: #include <sys/pool.h>
122: #include <sys/queue.h>
123: #include <sys/disk.h>
124: #include <sys/device.h>
125: #include <sys/stat.h>
126: #include <sys/ioctl.h>
127: #include <sys/fcntl.h>
128: #include <sys/systm.h>
129: #include <sys/namei.h>
130: #include <sys/vnode.h>
131: #include <sys/disklabel.h>
132: #include <sys/conf.h>
133: #include <sys/lock.h>
134: #include <sys/buf.h>
135: #include <sys/user.h>
1.65 oster 136: #include <sys/reboot.h>
1.8 oster 137:
1.110 oster 138: #include <dev/raidframe/raidframevar.h>
139: #include <dev/raidframe/raidframeio.h>
1.8 oster 140: #include "raid.h"
1.62 oster 141: #include "opt_raid_autoconfig.h"
1.1 oster 142: #include "rf_raid.h"
1.44 oster 143: #include "rf_copyback.h"
1.1 oster 144: #include "rf_dag.h"
145: #include "rf_dagflags.h"
1.99 oster 146: #include "rf_desc.h"
1.1 oster 147: #include "rf_diskqueue.h"
148: #include "rf_acctrace.h"
149: #include "rf_etimer.h"
150: #include "rf_general.h"
151: #include "rf_debugMem.h"
152: #include "rf_kintf.h"
153: #include "rf_options.h"
154: #include "rf_driver.h"
155: #include "rf_parityscan.h"
156: #include "rf_debugprint.h"
157: #include "rf_threadstuff.h"
158:
1.9 oster 159: int rf_kdebug_level = 0;
1.1 oster 160:
161: #ifdef DEBUG
162: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 163: #else /* DEBUG */
1.1 oster 164: #define db1_printf(a) { }
1.9 oster 165: #endif /* DEBUG */
1.1 oster 166:
1.9 oster 167: static RF_Raid_t **raidPtrs; /* global raid device descriptors */
1.1 oster 168:
1.11 oster 169: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
1.1 oster 170:
1.10 oster 171: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
172: * spare table */
173: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
174: * installation process */
175:
1.1 oster 176: /* prototypes */
1.10 oster 177: static void KernelWakeupFunc(struct buf * bp);
178: static void InitBP(struct buf * bp, struct vnode *, unsigned rw_flag,
179: dev_t dev, RF_SectorNum_t startSect,
180: RF_SectorCount_t numSect, caddr_t buf,
181: void (*cbFunc) (struct buf *), void *cbArg,
182: int logBytesPerSector, struct proc * b_proc);
1.104 oster 183: static void raidinit(RF_Raid_t *);
1.1 oster 184:
1.104 oster 185: void raidattach(int);
1.117.6.1 gehenna 186:
187: dev_type_open(raidopen);
188: dev_type_close(raidclose);
189: dev_type_read(raidread);
190: dev_type_write(raidwrite);
191: dev_type_ioctl(raidioctl);
192: dev_type_strategy(raidstrategy);
193: dev_type_dump(raiddump);
194: dev_type_size(raidsize);
195:
196: const struct bdevsw raid_bdevsw = {
197: raidopen, raidclose, raidstrategy, raidioctl,
198: raiddump, raidsize, D_DISK
199: };
200:
201: const struct cdevsw raid_cdevsw = {
202: raidopen, raidclose, raidread, raidwrite, raidioctl,
203: nostop, notty, nopoll, nommap, D_DISK
204: };
1.1 oster 205:
206: /*
207: * Pilfered from ccd.c
208: */
209:
1.10 oster 210: struct raidbuf {
211: struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
212: struct buf *rf_obp; /* ptr. to original I/O buf */
213: int rf_flags; /* misc. flags */
1.11 oster 214: RF_DiskQueueData_t *req;/* the request that this was part of.. */
1.10 oster 215: };
1.1 oster 216:
1.116 thorpej 217: /* component buffer pool */
218: struct pool raidframe_cbufpool;
1.1 oster 219:
1.116 thorpej 220: #define RAIDGETBUF(rs) pool_get(&raidframe_cbufpool, PR_NOWAIT)
221: #define RAIDPUTBUF(rs, cbp) pool_put(&raidframe_cbufpool, cbp)
1.1 oster 222:
1.9 oster 223: /* XXX Not sure if the following should be replacing the raidPtrs above,
1.53 oster 224: or if it should be used in conjunction with that...
1.59 oster 225: */
1.1 oster 226:
1.10 oster 227: struct raid_softc {
228: int sc_flags; /* flags */
229: int sc_cflags; /* configuration flags */
1.11 oster 230: size_t sc_size; /* size of the raid device */
1.10 oster 231: char sc_xname[20]; /* XXX external name */
232: struct disk sc_dkdev; /* generic disk device info */
1.47 thorpej 233: struct buf_queue buf_queue; /* used for the device queue */
1.10 oster 234: };
1.1 oster 235: /* sc_flags */
236: #define RAIDF_INITED 0x01 /* unit has been initialized */
237: #define RAIDF_WLABEL 0x02 /* label area is writable */
238: #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
239: #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
240: #define RAIDF_LOCKED 0x80 /* unit is locked */
241:
242: #define raidunit(x) DISKUNIT(x)
1.48 oster 243: int numraid = 0;
1.1 oster 244:
1.20 oster 245: /*
246: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
247: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 248: * kernel memory, especially on writes, and in degraded mode reads.
249: *
250: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
251: * a single 64K write will typically require 64K for the old data,
252: * 64K for the old parity, and 64K for the new parity, for a total
253: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 254: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 255: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
256: *
257: * Now in degraded mode, for example, a 64K read on the above setup may
258: * require data reconstruction, which will require *all* of the 4 remaining
259: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 260: */
261:
262: #ifndef RAIDOUTSTANDING
1.28 oster 263: #define RAIDOUTSTANDING 6
1.20 oster 264: #endif
265:
1.1 oster 266: #define RAIDLABELDEV(dev) \
267: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
268:
269: /* declared here, and made public, for the benefit of KVM stuff.. */
1.10 oster 270: struct raid_softc *raid_softc;
1.9 oster 271:
1.104 oster 272: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
273: struct disklabel *);
274: static void raidgetdisklabel(dev_t);
275: static void raidmakedisklabel(struct raid_softc *);
1.1 oster 276:
1.104 oster 277: static int raidlock(struct raid_softc *);
278: static void raidunlock(struct raid_softc *);
1.1 oster 279:
1.104 oster 280: static void rf_markalldirty(RF_Raid_t *);
281: void rf_mountroot_hook(struct device *);
1.48 oster 282:
283: struct device *raidrootdev;
1.1 oster 284:
1.104 oster 285: void rf_ReconThread(struct rf_recon_req *);
1.37 oster 286: /* XXX what I want is: */
1.104 oster 287: /*void rf_ReconThread(RF_Raid_t *raidPtr); */
288: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
289: void rf_CopybackThread(RF_Raid_t *raidPtr);
290: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
291: void rf_buildroothack(void *);
292:
293: RF_AutoConfig_t *rf_find_raid_components(void);
294: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
295: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
296: static int rf_reasonable_label(RF_ComponentLabel_t *);
297: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
298: int rf_set_autoconfig(RF_Raid_t *, int);
299: int rf_set_rootpartition(RF_Raid_t *, int);
300: void rf_release_all_vps(RF_ConfigSet_t *);
301: void rf_cleanup_config_set(RF_ConfigSet_t *);
302: int rf_have_enough_components(RF_ConfigSet_t *);
303: int rf_auto_config_set(RF_ConfigSet_t *, int *);
1.48 oster 304:
305: static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
1.62 oster 306: allow autoconfig to take place.
307: Note that this is overridden by having
308: RAID_AUTOCONFIG as an option in the
309: kernel config file. */
1.37 oster 310:
1.10 oster 311: void
312: raidattach(num)
1.9 oster 313: int num;
1.1 oster 314: {
1.14 oster 315: int raidID;
316: int i, rc;
1.48 oster 317: RF_AutoConfig_t *ac_list; /* autoconfig list */
318: RF_ConfigSet_t *config_sets;
1.1 oster 319:
320: #ifdef DEBUG
1.9 oster 321: printf("raidattach: Asked for %d units\n", num);
1.1 oster 322: #endif
323:
324: if (num <= 0) {
325: #ifdef DIAGNOSTIC
326: panic("raidattach: count <= 0");
327: #endif
328: return;
329: }
1.9 oster 330: /* This is where all the initialization stuff gets done. */
1.1 oster 331:
1.50 oster 332: numraid = num;
333:
1.1 oster 334: /* Make some space for requested number of units... */
335:
336: RF_Calloc(raidPtrs, num, sizeof(RF_Raid_t *), (RF_Raid_t **));
337: if (raidPtrs == NULL) {
338: panic("raidPtrs is NULL!!\n");
339: }
1.116 thorpej 340:
341: /* Initialize the component buffer pool. */
342: pool_init(&raidframe_cbufpool, sizeof(struct raidbuf), 0,
1.117 thorpej 343: 0, 0, "raidpl", NULL);
1.116 thorpej 344:
1.14 oster 345: rc = rf_mutex_init(&rf_sparet_wait_mutex);
346: if (rc) {
347: RF_PANIC();
348: }
349:
350: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
351:
1.58 oster 352: for (i = 0; i < num; i++)
1.14 oster 353: raidPtrs[i] = NULL;
354: rc = rf_BootRaidframe();
355: if (rc == 0)
356: printf("Kernelized RAIDframe activated\n");
357: else
1.1 oster 358: panic("Serious error booting RAID!!\n");
1.14 oster 359:
1.9 oster 360: /* put together some datastructures like the CCD device does.. This
361: * lets us lock the device and what-not when it gets opened. */
1.1 oster 362:
363: raid_softc = (struct raid_softc *)
1.48 oster 364: malloc(num * sizeof(struct raid_softc),
365: M_RAIDFRAME, M_NOWAIT);
1.1 oster 366: if (raid_softc == NULL) {
367: printf("WARNING: no memory for RAIDframe driver\n");
368: return;
369: }
1.50 oster 370:
1.108 thorpej 371: memset(raid_softc, 0, num * sizeof(struct raid_softc));
1.34 oster 372:
1.48 oster 373: raidrootdev = (struct device *)malloc(num * sizeof(struct device),
374: M_RAIDFRAME, M_NOWAIT);
375: if (raidrootdev == NULL) {
376: panic("No memory for RAIDframe driver!!?!?!\n");
377: }
378:
1.9 oster 379: for (raidID = 0; raidID < num; raidID++) {
1.47 thorpej 380: BUFQ_INIT(&raid_softc[raidID].buf_queue);
1.48 oster 381:
382: raidrootdev[raidID].dv_class = DV_DISK;
383: raidrootdev[raidID].dv_cfdata = NULL;
384: raidrootdev[raidID].dv_unit = raidID;
385: raidrootdev[raidID].dv_parent = NULL;
386: raidrootdev[raidID].dv_flags = 0;
387: sprintf(raidrootdev[raidID].dv_xname,"raid%d",raidID);
388:
1.9 oster 389: RF_Calloc(raidPtrs[raidID], 1, sizeof(RF_Raid_t),
1.11 oster 390: (RF_Raid_t *));
1.9 oster 391: if (raidPtrs[raidID] == NULL) {
1.39 oster 392: printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
393: numraid = raidID;
394: return;
1.1 oster 395: }
396: }
1.48 oster 397:
1.114 lukem 398: #ifdef RAID_AUTOCONFIG
1.62 oster 399: raidautoconfig = 1;
400: #endif
401:
1.48 oster 402: if (raidautoconfig) {
403: /* 1. locate all RAID components on the system */
404:
405: #if DEBUG
406: printf("Searching for raid components...\n");
407: #endif
408: ac_list = rf_find_raid_components();
409:
410: /* 2. sort them into their respective sets */
411:
412: config_sets = rf_create_auto_sets(ac_list);
413:
414: /* 3. evaluate each set and configure the valid ones
415: This gets done in rf_buildroothack() */
416:
417: /* schedule the creation of the thread to do the
418: "/ on RAID" stuff */
419:
420: kthread_create(rf_buildroothack,config_sets);
421:
422: #if 0
423: mountroothook_establish(rf_mountroot_hook, &raidrootdev[0]);
424: #endif
425: }
426:
427: }
428:
429: void
430: rf_buildroothack(arg)
431: void *arg;
432: {
433: RF_ConfigSet_t *config_sets = arg;
434: RF_ConfigSet_t *cset;
435: RF_ConfigSet_t *next_cset;
1.51 oster 436: int retcode;
1.48 oster 437: int raidID;
1.51 oster 438: int rootID;
439: int num_root;
1.48 oster 440:
1.101 oster 441: rootID = 0;
1.51 oster 442: num_root = 0;
1.48 oster 443: cset = config_sets;
444: while(cset != NULL ) {
445: next_cset = cset->next;
1.51 oster 446: if (rf_have_enough_components(cset) &&
447: cset->ac->clabel->autoconfigure==1) {
448: retcode = rf_auto_config_set(cset,&raidID);
449: if (!retcode) {
450: if (cset->rootable) {
451: rootID = raidID;
452: num_root++;
453: }
454: } else {
455: /* The autoconfig didn't work :( */
456: #if DEBUG
457: printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
458: #endif
459: rf_release_all_vps(cset);
1.48 oster 460: }
461: } else {
462: /* we're not autoconfiguring this set...
463: release the associated resources */
1.49 oster 464: rf_release_all_vps(cset);
1.48 oster 465: }
466: /* cleanup */
1.49 oster 467: rf_cleanup_config_set(cset);
1.48 oster 468: cset = next_cset;
469: }
1.61 oster 470:
1.117.6.3! gehenna 471: /* we found something bootable... */
! 472:
! 473: if (num_root == 1) {
! 474: booted_device = &raidrootdev[rootID];
! 475: } else if (num_root > 1) {
! 476: /* we can't guess.. require the user to answer... */
! 477: boothowto |= RB_ASKNAME;
1.51 oster 478: }
1.1 oster 479: }
480:
481:
482: int
483: raidsize(dev)
1.9 oster 484: dev_t dev;
1.1 oster 485: {
486: struct raid_softc *rs;
487: struct disklabel *lp;
1.9 oster 488: int part, unit, omask, size;
1.1 oster 489:
490: unit = raidunit(dev);
491: if (unit >= numraid)
492: return (-1);
493: rs = &raid_softc[unit];
494:
495: if ((rs->sc_flags & RAIDF_INITED) == 0)
496: return (-1);
497:
498: part = DISKPART(dev);
499: omask = rs->sc_dkdev.dk_openmask & (1 << part);
500: lp = rs->sc_dkdev.dk_label;
501:
502: if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
503: return (-1);
504:
505: if (lp->d_partitions[part].p_fstype != FS_SWAP)
506: size = -1;
507: else
508: size = lp->d_partitions[part].p_size *
509: (lp->d_secsize / DEV_BSIZE);
510:
511: if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
512: return (-1);
513:
514: return (size);
515:
516: }
517:
518: int
519: raiddump(dev, blkno, va, size)
1.9 oster 520: dev_t dev;
1.1 oster 521: daddr_t blkno;
522: caddr_t va;
1.9 oster 523: size_t size;
1.1 oster 524: {
525: /* Not implemented. */
526: return ENXIO;
527: }
528: /* ARGSUSED */
529: int
530: raidopen(dev, flags, fmt, p)
1.9 oster 531: dev_t dev;
532: int flags, fmt;
1.1 oster 533: struct proc *p;
534: {
1.9 oster 535: int unit = raidunit(dev);
1.1 oster 536: struct raid_softc *rs;
537: struct disklabel *lp;
1.9 oster 538: int part, pmask;
539: int error = 0;
540:
1.1 oster 541: if (unit >= numraid)
542: return (ENXIO);
543: rs = &raid_softc[unit];
544:
545: if ((error = raidlock(rs)) != 0)
1.9 oster 546: return (error);
1.1 oster 547: lp = rs->sc_dkdev.dk_label;
548:
549: part = DISKPART(dev);
550: pmask = (1 << part);
551:
552: db1_printf(("Opening raid device number: %d partition: %d\n",
1.14 oster 553: unit, part));
1.1 oster 554:
555:
556: if ((rs->sc_flags & RAIDF_INITED) &&
557: (rs->sc_dkdev.dk_openmask == 0))
1.9 oster 558: raidgetdisklabel(dev);
1.1 oster 559:
560: /* make sure that this partition exists */
561:
562: if (part != RAW_PART) {
563: db1_printf(("Not a raw partition..\n"));
564: if (((rs->sc_flags & RAIDF_INITED) == 0) ||
565: ((part >= lp->d_npartitions) ||
1.9 oster 566: (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1 oster 567: error = ENXIO;
568: raidunlock(rs);
569: db1_printf(("Bailing out...\n"));
1.9 oster 570: return (error);
1.1 oster 571: }
572: }
573: /* Prevent this unit from being unconfigured while open. */
574: switch (fmt) {
575: case S_IFCHR:
576: rs->sc_dkdev.dk_copenmask |= pmask;
577: break;
578:
579: case S_IFBLK:
580: rs->sc_dkdev.dk_bopenmask |= pmask;
581: break;
582: }
1.13 oster 583:
584: if ((rs->sc_dkdev.dk_openmask == 0) &&
585: ((rs->sc_flags & RAIDF_INITED) != 0)) {
586: /* First one... mark things as dirty... Note that we *MUST*
587: have done a configure before this. I DO NOT WANT TO BE
588: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
589: THAT THEY BELONG TOGETHER!!!!! */
590: /* XXX should check to see if we're only open for reading
591: here... If so, we needn't do this, but then need some
592: other way of keeping track of what's happened.. */
593:
594: rf_markalldirty( raidPtrs[unit] );
595: }
596:
597:
1.1 oster 598: rs->sc_dkdev.dk_openmask =
599: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
600:
601: raidunlock(rs);
602:
1.9 oster 603: return (error);
1.1 oster 604:
605:
606: }
607: /* ARGSUSED */
608: int
609: raidclose(dev, flags, fmt, p)
1.9 oster 610: dev_t dev;
611: int flags, fmt;
1.1 oster 612: struct proc *p;
613: {
1.9 oster 614: int unit = raidunit(dev);
1.1 oster 615: struct raid_softc *rs;
1.9 oster 616: int error = 0;
617: int part;
1.1 oster 618:
619: if (unit >= numraid)
620: return (ENXIO);
621: rs = &raid_softc[unit];
622:
623: if ((error = raidlock(rs)) != 0)
624: return (error);
625:
626: part = DISKPART(dev);
627:
628: /* ...that much closer to allowing unconfiguration... */
629: switch (fmt) {
630: case S_IFCHR:
631: rs->sc_dkdev.dk_copenmask &= ~(1 << part);
632: break;
633:
634: case S_IFBLK:
635: rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
636: break;
637: }
638: rs->sc_dkdev.dk_openmask =
639: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.13 oster 640:
641: if ((rs->sc_dkdev.dk_openmask == 0) &&
642: ((rs->sc_flags & RAIDF_INITED) != 0)) {
643: /* Last one... device is not unconfigured yet.
644: Device shutdown has taken care of setting the
645: clean bits if RAIDF_INITED is not set
646: mark things as clean... */
1.64 oster 647: #if 0
1.54 oster 648: printf("Last one on raid%d. Updating status.\n",unit);
649: #endif
1.91 oster 650: rf_update_component_labels(raidPtrs[unit],
651: RF_FINAL_COMPONENT_UPDATE);
1.107 oster 652: if (doing_shutdown) {
653: /* last one, and we're going down, so
654: lights out for this RAID set too. */
655: error = rf_Shutdown(raidPtrs[unit]);
656:
657: /* It's no longer initialized... */
658: rs->sc_flags &= ~RAIDF_INITED;
659:
660: /* Detach the disk. */
661: disk_detach(&rs->sc_dkdev);
662: }
1.13 oster 663: }
1.1 oster 664:
665: raidunlock(rs);
666: return (0);
667:
668: }
669:
670: void
671: raidstrategy(bp)
1.74 augustss 672: struct buf *bp;
1.1 oster 673: {
1.74 augustss 674: int s;
1.1 oster 675:
676: unsigned int raidID = raidunit(bp->b_dev);
677: RF_Raid_t *raidPtr;
678: struct raid_softc *rs = &raid_softc[raidID];
679: struct disklabel *lp;
1.9 oster 680: int wlabel;
1.1 oster 681:
1.30 oster 682: if ((rs->sc_flags & RAIDF_INITED) ==0) {
683: bp->b_error = ENXIO;
1.100 chs 684: bp->b_flags |= B_ERROR;
1.30 oster 685: bp->b_resid = bp->b_bcount;
686: biodone(bp);
1.1 oster 687: return;
1.30 oster 688: }
1.1 oster 689: if (raidID >= numraid || !raidPtrs[raidID]) {
690: bp->b_error = ENODEV;
691: bp->b_flags |= B_ERROR;
692: bp->b_resid = bp->b_bcount;
693: biodone(bp);
694: return;
695: }
696: raidPtr = raidPtrs[raidID];
697: if (!raidPtr->valid) {
698: bp->b_error = ENODEV;
699: bp->b_flags |= B_ERROR;
700: bp->b_resid = bp->b_bcount;
701: biodone(bp);
702: return;
703: }
704: if (bp->b_bcount == 0) {
705: db1_printf(("b_bcount is zero..\n"));
706: biodone(bp);
707: return;
708: }
709: lp = rs->sc_dkdev.dk_label;
710:
711: /*
712: * Do bounds checking and adjust transfer. If there's an
713: * error, the bounds check will flag that for us.
714: */
715:
1.9 oster 716: wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.1 oster 717: if (DISKPART(bp->b_dev) != RAW_PART)
718: if (bounds_check_with_label(bp, lp, wlabel) <= 0) {
719: db1_printf(("Bounds check failed!!:%d %d\n",
1.9 oster 720: (int) bp->b_blkno, (int) wlabel));
1.1 oster 721: biodone(bp);
722: return;
723: }
1.34 oster 724: s = splbio();
1.1 oster 725:
726: bp->b_resid = 0;
1.34 oster 727:
728: /* stuff it onto our queue */
1.47 thorpej 729: BUFQ_INSERT_TAIL(&rs->buf_queue, bp);
1.34 oster 730:
731: raidstart(raidPtrs[raidID]);
732:
1.1 oster 733: splx(s);
734: }
735: /* ARGSUSED */
736: int
737: raidread(dev, uio, flags)
1.9 oster 738: dev_t dev;
1.1 oster 739: struct uio *uio;
1.9 oster 740: int flags;
1.1 oster 741: {
1.9 oster 742: int unit = raidunit(dev);
1.1 oster 743: struct raid_softc *rs;
1.9 oster 744: int part;
1.1 oster 745:
746: if (unit >= numraid)
747: return (ENXIO);
748: rs = &raid_softc[unit];
749:
750: if ((rs->sc_flags & RAIDF_INITED) == 0)
751: return (ENXIO);
752: part = DISKPART(dev);
753:
1.9 oster 754: db1_printf(("raidread: unit: %d partition: %d\n", unit, part));
1.1 oster 755:
756: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
757:
758: }
759: /* ARGSUSED */
760: int
761: raidwrite(dev, uio, flags)
1.9 oster 762: dev_t dev;
1.1 oster 763: struct uio *uio;
1.9 oster 764: int flags;
1.1 oster 765: {
1.9 oster 766: int unit = raidunit(dev);
1.1 oster 767: struct raid_softc *rs;
768:
769: if (unit >= numraid)
770: return (ENXIO);
771: rs = &raid_softc[unit];
772:
773: if ((rs->sc_flags & RAIDF_INITED) == 0)
774: return (ENXIO);
775: db1_printf(("raidwrite\n"));
776: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
777:
778: }
779:
780: int
781: raidioctl(dev, cmd, data, flag, p)
1.9 oster 782: dev_t dev;
783: u_long cmd;
1.1 oster 784: caddr_t data;
1.9 oster 785: int flag;
1.1 oster 786: struct proc *p;
787: {
1.9 oster 788: int unit = raidunit(dev);
789: int error = 0;
790: int part, pmask;
1.1 oster 791: struct raid_softc *rs;
792: RF_Config_t *k_cfg, *u_cfg;
1.42 oster 793: RF_Raid_t *raidPtr;
1.48 oster 794: RF_RaidDisk_t *diskPtr;
1.41 oster 795: RF_AccTotals_t *totals;
796: RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1 oster 797: u_char *specific_buf;
1.11 oster 798: int retcode = 0;
799: int row;
800: int column;
1.117.6.3! gehenna 801: int raidid;
1.1 oster 802: struct rf_recon_req *rrcopy, *rr;
1.48 oster 803: RF_ComponentLabel_t *clabel;
1.11 oster 804: RF_ComponentLabel_t ci_label;
1.48 oster 805: RF_ComponentLabel_t **clabel_ptr;
1.12 oster 806: RF_SingleComponent_t *sparePtr,*componentPtr;
807: RF_SingleComponent_t hot_spare;
808: RF_SingleComponent_t component;
1.83 oster 809: RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41 oster 810: int i, j, d;
1.102 fvdl 811: #ifdef __HAVE_OLD_DISKLABEL
812: struct disklabel newlabel;
813: #endif
1.1 oster 814:
815: if (unit >= numraid)
816: return (ENXIO);
817: rs = &raid_softc[unit];
1.42 oster 818: raidPtr = raidPtrs[unit];
1.1 oster 819:
1.9 oster 820: db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
821: (int) DISKPART(dev), (int) unit, (int) cmd));
1.1 oster 822:
823: /* Must be open for writes for these commands... */
824: switch (cmd) {
825: case DIOCSDINFO:
826: case DIOCWDINFO:
1.102 fvdl 827: #ifdef __HAVE_OLD_DISKLABEL
828: case ODIOCWDINFO:
829: case ODIOCSDINFO:
830: #endif
1.1 oster 831: case DIOCWLABEL:
832: if ((flag & FWRITE) == 0)
833: return (EBADF);
834: }
835:
836: /* Must be initialized for these... */
837: switch (cmd) {
838: case DIOCGDINFO:
839: case DIOCSDINFO:
840: case DIOCWDINFO:
1.102 fvdl 841: #ifdef __HAVE_OLD_DISKLABEL
842: case ODIOCGDINFO:
843: case ODIOCWDINFO:
844: case ODIOCSDINFO:
845: case ODIOCGDEFLABEL:
846: #endif
1.1 oster 847: case DIOCGPART:
848: case DIOCWLABEL:
849: case DIOCGDEFLABEL:
850: case RAIDFRAME_SHUTDOWN:
851: case RAIDFRAME_REWRITEPARITY:
852: case RAIDFRAME_GET_INFO:
853: case RAIDFRAME_RESET_ACCTOTALS:
854: case RAIDFRAME_GET_ACCTOTALS:
855: case RAIDFRAME_KEEP_ACCTOTALS:
856: case RAIDFRAME_GET_SIZE:
857: case RAIDFRAME_FAIL_DISK:
858: case RAIDFRAME_COPYBACK:
1.37 oster 859: case RAIDFRAME_CHECK_RECON_STATUS:
1.83 oster 860: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11 oster 861: case RAIDFRAME_GET_COMPONENT_LABEL:
862: case RAIDFRAME_SET_COMPONENT_LABEL:
863: case RAIDFRAME_ADD_HOT_SPARE:
864: case RAIDFRAME_REMOVE_HOT_SPARE:
865: case RAIDFRAME_INIT_LABELS:
1.12 oster 866: case RAIDFRAME_REBUILD_IN_PLACE:
1.23 oster 867: case RAIDFRAME_CHECK_PARITY:
1.37 oster 868: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83 oster 869: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37 oster 870: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83 oster 871: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48 oster 872: case RAIDFRAME_SET_AUTOCONFIG:
873: case RAIDFRAME_SET_ROOT:
1.73 oster 874: case RAIDFRAME_DELETE_COMPONENT:
875: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.1 oster 876: if ((rs->sc_flags & RAIDF_INITED) == 0)
877: return (ENXIO);
878: }
1.9 oster 879:
1.1 oster 880: switch (cmd) {
881:
882: /* configure the system */
883: case RAIDFRAME_CONFIGURE:
1.48 oster 884:
885: if (raidPtr->valid) {
886: /* There is a valid RAID set running on this unit! */
887: printf("raid%d: Device already configured!\n",unit);
1.66 oster 888: return(EINVAL);
1.48 oster 889: }
890:
1.1 oster 891: /* copy-in the configuration information */
892: /* data points to a pointer to the configuration structure */
1.43 oster 893:
1.9 oster 894: u_cfg = *((RF_Config_t **) data);
895: RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1 oster 896: if (k_cfg == NULL) {
1.9 oster 897: return (ENOMEM);
1.1 oster 898: }
1.9 oster 899: retcode = copyin((caddr_t) u_cfg, (caddr_t) k_cfg,
900: sizeof(RF_Config_t));
1.1 oster 901: if (retcode) {
1.33 oster 902: RF_Free(k_cfg, sizeof(RF_Config_t));
1.46 oster 903: db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9 oster 904: retcode));
905: return (retcode);
1.1 oster 906: }
1.9 oster 907: /* allocate a buffer for the layout-specific data, and copy it
908: * in */
1.1 oster 909: if (k_cfg->layoutSpecificSize) {
1.9 oster 910: if (k_cfg->layoutSpecificSize > 10000) {
1.1 oster 911: /* sanity check */
1.33 oster 912: RF_Free(k_cfg, sizeof(RF_Config_t));
1.9 oster 913: return (EINVAL);
1.1 oster 914: }
1.9 oster 915: RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
916: (u_char *));
1.1 oster 917: if (specific_buf == NULL) {
1.9 oster 918: RF_Free(k_cfg, sizeof(RF_Config_t));
919: return (ENOMEM);
1.1 oster 920: }
1.9 oster 921: retcode = copyin(k_cfg->layoutSpecific,
922: (caddr_t) specific_buf,
923: k_cfg->layoutSpecificSize);
1.1 oster 924: if (retcode) {
1.33 oster 925: RF_Free(k_cfg, sizeof(RF_Config_t));
1.42 oster 926: RF_Free(specific_buf,
927: k_cfg->layoutSpecificSize);
1.46 oster 928: db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9 oster 929: retcode));
930: return (retcode);
1.1 oster 931: }
1.9 oster 932: } else
933: specific_buf = NULL;
1.1 oster 934: k_cfg->layoutSpecific = specific_buf;
1.9 oster 935:
936: /* should do some kind of sanity check on the configuration.
937: * Store the sum of all the bytes in the last byte? */
1.1 oster 938:
939: /* configure the system */
940:
1.48 oster 941: /*
942: * Clear the entire RAID descriptor, just to make sure
943: * there is no stale data left in the case of a
944: * reconfiguration
945: */
1.108 thorpej 946: memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1.42 oster 947: raidPtr->raidid = unit;
1.20 oster 948:
1.48 oster 949: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1 oster 950:
1.40 oster 951: if (retcode == 0) {
1.37 oster 952:
1.40 oster 953: /* allow this many simultaneous IO's to
954: this RAID device */
1.42 oster 955: raidPtr->openings = RAIDOUTSTANDING;
1.48 oster 956:
1.59 oster 957: raidinit(raidPtr);
958: rf_markalldirty(raidPtr);
1.9 oster 959: }
1.1 oster 960: /* free the buffers. No return code here. */
961: if (k_cfg->layoutSpecificSize) {
1.9 oster 962: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1 oster 963: }
1.9 oster 964: RF_Free(k_cfg, sizeof(RF_Config_t));
965:
966: return (retcode);
967:
968: /* shutdown the system */
1.1 oster 969: case RAIDFRAME_SHUTDOWN:
1.9 oster 970:
971: if ((error = raidlock(rs)) != 0)
972: return (error);
1.1 oster 973:
974: /*
975: * If somebody has a partition mounted, we shouldn't
976: * shutdown.
977: */
978:
979: part = DISKPART(dev);
980: pmask = (1 << part);
1.9 oster 981: if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
982: ((rs->sc_dkdev.dk_bopenmask & pmask) &&
983: (rs->sc_dkdev.dk_copenmask & pmask))) {
984: raidunlock(rs);
985: return (EBUSY);
986: }
1.11 oster 987:
1.42 oster 988: retcode = rf_Shutdown(raidPtr);
1.1 oster 989:
990: /* It's no longer initialized... */
991: rs->sc_flags &= ~RAIDF_INITED;
1.16 oster 992:
1.9 oster 993: /* Detach the disk. */
994: disk_detach(&rs->sc_dkdev);
1.1 oster 995:
996: raidunlock(rs);
997:
1.9 oster 998: return (retcode);
1.11 oster 999: case RAIDFRAME_GET_COMPONENT_LABEL:
1.48 oster 1000: clabel_ptr = (RF_ComponentLabel_t **) data;
1.11 oster 1001: /* need to read the component label for the disk indicated
1.48 oster 1002: by row,column in clabel */
1.11 oster 1003:
1004: /* For practice, let's get it directly fromdisk, rather
1005: than from the in-core copy */
1.48 oster 1006: RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1.11 oster 1007: (RF_ComponentLabel_t *));
1.48 oster 1008: if (clabel == NULL)
1.11 oster 1009: return (ENOMEM);
1010:
1.108 thorpej 1011: memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1.11 oster 1012:
1.48 oster 1013: retcode = copyin( *clabel_ptr, clabel,
1.11 oster 1014: sizeof(RF_ComponentLabel_t));
1015:
1016: if (retcode) {
1.48 oster 1017: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 1018: return(retcode);
1019: }
1020:
1.48 oster 1021: row = clabel->row;
1022: column = clabel->column;
1.26 oster 1023:
1.42 oster 1024: if ((row < 0) || (row >= raidPtr->numRow) ||
1.90 oster 1025: (column < 0) || (column >= raidPtr->numCol +
1026: raidPtr->numSpare)) {
1.48 oster 1027: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.26 oster 1028: return(EINVAL);
1.11 oster 1029: }
1030:
1.48 oster 1031: raidread_component_label(raidPtr->Disks[row][column].dev,
1032: raidPtr->raid_cinfo[row][column].ci_vp,
1033: clabel );
1.11 oster 1034:
1.48 oster 1035: retcode = copyout((caddr_t) clabel,
1036: (caddr_t) *clabel_ptr,
1.11 oster 1037: sizeof(RF_ComponentLabel_t));
1.48 oster 1038: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 1039: return (retcode);
1040:
1041: case RAIDFRAME_SET_COMPONENT_LABEL:
1.48 oster 1042: clabel = (RF_ComponentLabel_t *) data;
1.11 oster 1043:
1044: /* XXX check the label for valid stuff... */
1045: /* Note that some things *should not* get modified --
1046: the user should be re-initing the labels instead of
1047: trying to patch things.
1048: */
1049:
1.117.6.3! gehenna 1050: raidid = raidPtr->raidid;
! 1051: printf("raid%d: Got component label:\n", raidid);
! 1052: printf("raid%d: Version: %d\n", raidid, clabel->version);
! 1053: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
! 1054: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
! 1055: printf("raid%d: Row: %d\n", raidid, clabel->row);
! 1056: printf("raid%d: Column: %d\n", raidid, clabel->column);
! 1057: printf("raid%d: Num Rows: %d\n", raidid, clabel->num_rows);
! 1058: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
! 1059: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
! 1060: printf("raid%d: Status: %d\n", raidid, clabel->status);
1.11 oster 1061:
1.48 oster 1062: row = clabel->row;
1063: column = clabel->column;
1.12 oster 1064:
1.42 oster 1065: if ((row < 0) || (row >= raidPtr->numRow) ||
1066: (column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1067: return(EINVAL);
1.11 oster 1068: }
1.12 oster 1069:
1070: /* XXX this isn't allowed to do anything for now :-) */
1.48 oster 1071:
1072: /* XXX and before it is, we need to fill in the rest
1073: of the fields!?!?!?! */
1.12 oster 1074: #if 0
1.11 oster 1075: raidwrite_component_label(
1.42 oster 1076: raidPtr->Disks[row][column].dev,
1077: raidPtr->raid_cinfo[row][column].ci_vp,
1.48 oster 1078: clabel );
1.12 oster 1079: #endif
1080: return (0);
1.11 oster 1081:
1082: case RAIDFRAME_INIT_LABELS:
1.48 oster 1083: clabel = (RF_ComponentLabel_t *) data;
1.11 oster 1084: /*
1085: we only want the serial number from
1086: the above. We get all the rest of the information
1087: from the config that was used to create this RAID
1088: set.
1089: */
1.12 oster 1090:
1.48 oster 1091: raidPtr->serial_number = clabel->serial_number;
1.51 oster 1092:
1093: raid_init_component_label(raidPtr, &ci_label);
1094: ci_label.serial_number = clabel->serial_number;
1.11 oster 1095:
1.42 oster 1096: for(row=0;row<raidPtr->numRow;row++) {
1.11 oster 1097: ci_label.row = row;
1.42 oster 1098: for(column=0;column<raidPtr->numCol;column++) {
1.48 oster 1099: diskPtr = &raidPtr->Disks[row][column];
1.98 oster 1100: if (!RF_DEAD_DISK(diskPtr->status)) {
1.94 oster 1101: ci_label.partitionSize = diskPtr->partitionSize;
1102: ci_label.column = column;
1103: raidwrite_component_label(
1104: raidPtr->Disks[row][column].dev,
1105: raidPtr->raid_cinfo[row][column].ci_vp,
1106: &ci_label );
1107: }
1.11 oster 1108: }
1109: }
1110:
1111: return (retcode);
1.48 oster 1112: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1113: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.117.6.3! gehenna 1114: printf("raid%d: New autoconfig value is: %d\n",
! 1115: raidPtr->raidid, d);
1.78 minoura 1116: *(int *) data = d;
1.48 oster 1117: return (retcode);
1118:
1119: case RAIDFRAME_SET_ROOT:
1.78 minoura 1120: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.117.6.3! gehenna 1121: printf("raid%d: New rootpartition value is: %d\n",
! 1122: raidPtr->raidid, d);
1.78 minoura 1123: *(int *) data = d;
1.48 oster 1124: return (retcode);
1.9 oster 1125:
1.1 oster 1126: /* initialize all parity */
1127: case RAIDFRAME_REWRITEPARITY:
1128:
1.42 oster 1129: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1130: /* Parity for RAID 0 is trivially correct */
1.42 oster 1131: raidPtr->parity_good = RF_RAID_CLEAN;
1.17 oster 1132: return(0);
1133: }
1.37 oster 1134:
1.42 oster 1135: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1136: /* Re-write is already in progress! */
1137: return(EINVAL);
1138: }
1.27 oster 1139:
1.42 oster 1140: retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37 oster 1141: rf_RewriteParityThread,
1.42 oster 1142: raidPtr,"raid_parity");
1.9 oster 1143: return (retcode);
1144:
1.11 oster 1145:
1146: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1147: sparePtr = (RF_SingleComponent_t *) data;
1148: memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1.42 oster 1149: retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1.11 oster 1150: return(retcode);
1151:
1152: case RAIDFRAME_REMOVE_HOT_SPARE:
1.73 oster 1153: return(retcode);
1154:
1155: case RAIDFRAME_DELETE_COMPONENT:
1156: componentPtr = (RF_SingleComponent_t *)data;
1157: memcpy( &component, componentPtr,
1158: sizeof(RF_SingleComponent_t));
1159: retcode = rf_delete_component(raidPtr, &component);
1160: return(retcode);
1161:
1162: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1163: componentPtr = (RF_SingleComponent_t *)data;
1164: memcpy( &component, componentPtr,
1165: sizeof(RF_SingleComponent_t));
1166: retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1167: return(retcode);
1168:
1.12 oster 1169: case RAIDFRAME_REBUILD_IN_PLACE:
1.24 oster 1170:
1.42 oster 1171: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1172: /* Can't do this on a RAID 0!! */
1173: return(EINVAL);
1174: }
1175:
1.42 oster 1176: if (raidPtr->recon_in_progress == 1) {
1.37 oster 1177: /* a reconstruct is already in progress! */
1178: return(EINVAL);
1179: }
1180:
1.12 oster 1181: componentPtr = (RF_SingleComponent_t *) data;
1182: memcpy( &component, componentPtr,
1183: sizeof(RF_SingleComponent_t));
1184: row = component.row;
1185: column = component.column;
1.117.6.3! gehenna 1186: printf("raid%d: Rebuild: %d %d\n", raidPtr->raidid,
! 1187: row, column);
1.42 oster 1188: if ((row < 0) || (row >= raidPtr->numRow) ||
1189: (column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1190: return(EINVAL);
1191: }
1.37 oster 1192:
1193: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1194: if (rrcopy == NULL)
1195: return(ENOMEM);
1.37 oster 1196:
1.42 oster 1197: rrcopy->raidPtr = (void *) raidPtr;
1.37 oster 1198: rrcopy->row = row;
1199: rrcopy->col = column;
1200:
1.42 oster 1201: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1202: rf_ReconstructInPlaceThread,
1203: rrcopy,"raid_reconip");
1.12 oster 1204: return(retcode);
1205:
1.1 oster 1206: case RAIDFRAME_GET_INFO:
1.42 oster 1207: if (!raidPtr->valid)
1.41 oster 1208: return (ENODEV);
1209: ucfgp = (RF_DeviceConfig_t **) data;
1210: RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1211: (RF_DeviceConfig_t *));
1212: if (d_cfg == NULL)
1213: return (ENOMEM);
1.108 thorpej 1214: memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1.42 oster 1215: d_cfg->rows = raidPtr->numRow;
1216: d_cfg->cols = raidPtr->numCol;
1217: d_cfg->ndevs = raidPtr->numRow * raidPtr->numCol;
1.41 oster 1218: if (d_cfg->ndevs >= RF_MAX_DISKS) {
1219: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1220: return (ENOMEM);
1221: }
1.42 oster 1222: d_cfg->nspares = raidPtr->numSpare;
1.41 oster 1223: if (d_cfg->nspares >= RF_MAX_DISKS) {
1224: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1225: return (ENOMEM);
1226: }
1.42 oster 1227: d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41 oster 1228: d = 0;
1229: for (i = 0; i < d_cfg->rows; i++) {
1230: for (j = 0; j < d_cfg->cols; j++) {
1.42 oster 1231: d_cfg->devs[d] = raidPtr->Disks[i][j];
1.41 oster 1232: d++;
1.1 oster 1233: }
1.41 oster 1234: }
1235: for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.42 oster 1236: d_cfg->spares[i] = raidPtr->Disks[0][j];
1.41 oster 1237: }
1238: retcode = copyout((caddr_t) d_cfg, (caddr_t) * ucfgp,
1239: sizeof(RF_DeviceConfig_t));
1240: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1241:
1242: return (retcode);
1.9 oster 1243:
1.22 oster 1244: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1245: *(int *) data = raidPtr->parity_good;
1.22 oster 1246: return (0);
1.41 oster 1247:
1.1 oster 1248: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1249: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41 oster 1250: return (0);
1.9 oster 1251:
1.1 oster 1252: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1253: totals = (RF_AccTotals_t *) data;
1.42 oster 1254: *totals = raidPtr->acc_totals;
1.41 oster 1255: return (0);
1.9 oster 1256:
1.1 oster 1257: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1258: raidPtr->keep_acc_totals = *(int *)data;
1.41 oster 1259: return (0);
1.9 oster 1260:
1.1 oster 1261: case RAIDFRAME_GET_SIZE:
1.42 oster 1262: *(int *) data = raidPtr->totalSectors;
1.9 oster 1263: return (0);
1.1 oster 1264:
1265: /* fail a disk & optionally start reconstruction */
1266: case RAIDFRAME_FAIL_DISK:
1.24 oster 1267:
1.42 oster 1268: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1269: /* Can't do this on a RAID 0!! */
1270: return(EINVAL);
1271: }
1272:
1.1 oster 1273: rr = (struct rf_recon_req *) data;
1.9 oster 1274:
1.42 oster 1275: if (rr->row < 0 || rr->row >= raidPtr->numRow
1276: || rr->col < 0 || rr->col >= raidPtr->numCol)
1.9 oster 1277: return (EINVAL);
1.1 oster 1278:
1.12 oster 1279: printf("raid%d: Failing the disk: row: %d col: %d\n",
1280: unit, rr->row, rr->col);
1.9 oster 1281:
1282: /* make a copy of the recon request so that we don't rely on
1283: * the user's buffer */
1.1 oster 1284: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1285: if (rrcopy == NULL)
1286: return(ENOMEM);
1.117.6.2 gehenna 1287: memcpy(rrcopy, rr, sizeof(*rr));
1.42 oster 1288: rrcopy->raidPtr = (void *) raidPtr;
1.1 oster 1289:
1.42 oster 1290: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1291: rf_ReconThread,
1292: rrcopy,"raid_recon");
1.9 oster 1293: return (0);
1294:
1295: /* invoke a copyback operation after recon on whatever disk
1296: * needs it, if any */
1297: case RAIDFRAME_COPYBACK:
1.24 oster 1298:
1.42 oster 1299: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1300: /* This makes no sense on a RAID 0!! */
1301: return(EINVAL);
1302: }
1303:
1.42 oster 1304: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1305: /* Copyback is already in progress! */
1306: return(EINVAL);
1307: }
1.27 oster 1308:
1.42 oster 1309: retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37 oster 1310: rf_CopybackThread,
1.42 oster 1311: raidPtr,"raid_copyback");
1.37 oster 1312: return (retcode);
1.9 oster 1313:
1.1 oster 1314: /* return the percentage completion of reconstruction */
1.37 oster 1315: case RAIDFRAME_CHECK_RECON_STATUS:
1.42 oster 1316: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71 oster 1317: /* This makes no sense on a RAID 0, so tell the
1318: user it's done. */
1319: *(int *) data = 100;
1320: return(0);
1.24 oster 1321: }
1.37 oster 1322: row = 0; /* XXX we only consider a single row... */
1.42 oster 1323: if (raidPtr->status[row] != rf_rs_reconstructing)
1.1 oster 1324: *(int *) data = 100;
1.9 oster 1325: else
1.42 oster 1326: *(int *) data = raidPtr->reconControl[row]->percentComplete;
1.9 oster 1327: return (0);
1.83 oster 1328: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1329: progressInfoPtr = (RF_ProgressInfo_t **) data;
1330: row = 0; /* XXX we only consider a single row... */
1331: if (raidPtr->status[row] != rf_rs_reconstructing) {
1332: progressInfo.remaining = 0;
1333: progressInfo.completed = 100;
1334: progressInfo.total = 100;
1335: } else {
1336: progressInfo.total =
1337: raidPtr->reconControl[row]->numRUsTotal;
1338: progressInfo.completed =
1339: raidPtr->reconControl[row]->numRUsComplete;
1340: progressInfo.remaining = progressInfo.total -
1341: progressInfo.completed;
1342: }
1343: retcode = copyout((caddr_t) &progressInfo,
1344: (caddr_t) *progressInfoPtr,
1345: sizeof(RF_ProgressInfo_t));
1346: return (retcode);
1.9 oster 1347:
1.37 oster 1348: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1349: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1350: /* This makes no sense on a RAID 0, so tell the
1351: user it's done. */
1352: *(int *) data = 100;
1353: return(0);
1.37 oster 1354: }
1.42 oster 1355: if (raidPtr->parity_rewrite_in_progress == 1) {
1.83 oster 1356: *(int *) data = 100 *
1357: raidPtr->parity_rewrite_stripes_done /
1358: raidPtr->Layout.numStripe;
1.37 oster 1359: } else {
1360: *(int *) data = 100;
1361: }
1362: return (0);
1363:
1.83 oster 1364: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1365: progressInfoPtr = (RF_ProgressInfo_t **) data;
1366: if (raidPtr->parity_rewrite_in_progress == 1) {
1367: progressInfo.total = raidPtr->Layout.numStripe;
1368: progressInfo.completed =
1369: raidPtr->parity_rewrite_stripes_done;
1370: progressInfo.remaining = progressInfo.total -
1371: progressInfo.completed;
1372: } else {
1373: progressInfo.remaining = 0;
1374: progressInfo.completed = 100;
1375: progressInfo.total = 100;
1376: }
1377: retcode = copyout((caddr_t) &progressInfo,
1378: (caddr_t) *progressInfoPtr,
1379: sizeof(RF_ProgressInfo_t));
1380: return (retcode);
1381:
1.37 oster 1382: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1383: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1384: /* This makes no sense on a RAID 0 */
1.83 oster 1385: *(int *) data = 100;
1386: return(0);
1.37 oster 1387: }
1.42 oster 1388: if (raidPtr->copyback_in_progress == 1) {
1389: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1390: raidPtr->Layout.numStripe;
1.37 oster 1391: } else {
1392: *(int *) data = 100;
1393: }
1394: return (0);
1395:
1.83 oster 1396: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93 oster 1397: progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83 oster 1398: if (raidPtr->copyback_in_progress == 1) {
1399: progressInfo.total = raidPtr->Layout.numStripe;
1400: progressInfo.completed =
1.93 oster 1401: raidPtr->copyback_stripes_done;
1.83 oster 1402: progressInfo.remaining = progressInfo.total -
1403: progressInfo.completed;
1404: } else {
1405: progressInfo.remaining = 0;
1406: progressInfo.completed = 100;
1407: progressInfo.total = 100;
1408: }
1409: retcode = copyout((caddr_t) &progressInfo,
1410: (caddr_t) *progressInfoPtr,
1411: sizeof(RF_ProgressInfo_t));
1412: return (retcode);
1.37 oster 1413:
1.9 oster 1414: /* the sparetable daemon calls this to wait for the kernel to
1415: * need a spare table. this ioctl does not return until a
1416: * spare table is needed. XXX -- calling mpsleep here in the
1417: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1418: * -- I should either compute the spare table in the kernel,
1419: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1420: * character device) for delivering the table -- XXX */
1.1 oster 1421: #if 0
1422: case RAIDFRAME_SPARET_WAIT:
1423: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1424: while (!rf_sparet_wait_queue)
1425: mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1.1 oster 1426: waitreq = rf_sparet_wait_queue;
1427: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1428: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1429:
1.42 oster 1430: /* structure assignment */
1431: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1432:
1.1 oster 1433: RF_Free(waitreq, sizeof(*waitreq));
1.9 oster 1434: return (0);
1435:
1436: /* wakes up a process waiting on SPARET_WAIT and puts an error
1437: * code in it that will cause the dameon to exit */
1.1 oster 1438: case RAIDFRAME_ABORT_SPARET_WAIT:
1439: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1440: waitreq->fcol = -1;
1441: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1442: waitreq->next = rf_sparet_wait_queue;
1443: rf_sparet_wait_queue = waitreq;
1444: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1445: wakeup(&rf_sparet_wait_queue);
1.9 oster 1446: return (0);
1.1 oster 1447:
1.9 oster 1448: /* used by the spare table daemon to deliver a spare table
1449: * into the kernel */
1.1 oster 1450: case RAIDFRAME_SEND_SPARET:
1.9 oster 1451:
1.1 oster 1452: /* install the spare table */
1.42 oster 1453: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1454:
1455: /* respond to the requestor. the return status of the spare
1456: * table installation is passed in the "fcol" field */
1.1 oster 1457: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1458: waitreq->fcol = retcode;
1459: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1460: waitreq->next = rf_sparet_resp_queue;
1461: rf_sparet_resp_queue = waitreq;
1462: wakeup(&rf_sparet_resp_queue);
1463: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1464:
1465: return (retcode);
1.1 oster 1466: #endif
1467:
1.9 oster 1468: default:
1.36 oster 1469: break; /* fall through to the os-specific code below */
1.1 oster 1470:
1471: }
1.9 oster 1472:
1.42 oster 1473: if (!raidPtr->valid)
1.9 oster 1474: return (EINVAL);
1475:
1.1 oster 1476: /*
1477: * Add support for "regular" device ioctls here.
1478: */
1.9 oster 1479:
1.1 oster 1480: switch (cmd) {
1481: case DIOCGDINFO:
1.9 oster 1482: *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1 oster 1483: break;
1.102 fvdl 1484: #ifdef __HAVE_OLD_DISKLABEL
1485: case ODIOCGDINFO:
1486: newlabel = *(rs->sc_dkdev.dk_label);
1487: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1488: return ENOTTY;
1.102 fvdl 1489: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1490: break;
1491: #endif
1.1 oster 1492:
1493: case DIOCGPART:
1.9 oster 1494: ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1495: ((struct partinfo *) data)->part =
1.1 oster 1496: &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1497: break;
1498:
1499: case DIOCWDINFO:
1500: case DIOCSDINFO:
1.102 fvdl 1501: #ifdef __HAVE_OLD_DISKLABEL
1502: case ODIOCWDINFO:
1503: case ODIOCSDINFO:
1504: #endif
1505: {
1506: struct disklabel *lp;
1507: #ifdef __HAVE_OLD_DISKLABEL
1508: if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1509: memset(&newlabel, 0, sizeof newlabel);
1510: memcpy(&newlabel, data, sizeof (struct olddisklabel));
1511: lp = &newlabel;
1512: } else
1513: #endif
1514: lp = (struct disklabel *)data;
1515:
1.1 oster 1516: if ((error = raidlock(rs)) != 0)
1517: return (error);
1518:
1519: rs->sc_flags |= RAIDF_LABELLING;
1520:
1521: error = setdisklabel(rs->sc_dkdev.dk_label,
1.102 fvdl 1522: lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1 oster 1523: if (error == 0) {
1.102 fvdl 1524: if (cmd == DIOCWDINFO
1525: #ifdef __HAVE_OLD_DISKLABEL
1526: || cmd == ODIOCWDINFO
1527: #endif
1528: )
1.1 oster 1529: error = writedisklabel(RAIDLABELDEV(dev),
1530: raidstrategy, rs->sc_dkdev.dk_label,
1531: rs->sc_dkdev.dk_cpulabel);
1532: }
1533: rs->sc_flags &= ~RAIDF_LABELLING;
1534:
1535: raidunlock(rs);
1536:
1537: if (error)
1538: return (error);
1539: break;
1.102 fvdl 1540: }
1.1 oster 1541:
1542: case DIOCWLABEL:
1.9 oster 1543: if (*(int *) data != 0)
1.1 oster 1544: rs->sc_flags |= RAIDF_WLABEL;
1545: else
1546: rs->sc_flags &= ~RAIDF_WLABEL;
1547: break;
1548:
1549: case DIOCGDEFLABEL:
1.102 fvdl 1550: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1 oster 1551: break;
1.102 fvdl 1552:
1553: #ifdef __HAVE_OLD_DISKLABEL
1554: case ODIOCGDEFLABEL:
1555: raidgetdefaultlabel(raidPtr, rs, &newlabel);
1556: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1557: return ENOTTY;
1.102 fvdl 1558: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1559: break;
1560: #endif
1.1 oster 1561:
1562: default:
1.39 oster 1563: retcode = ENOTTY;
1.1 oster 1564: }
1.9 oster 1565: return (retcode);
1.1 oster 1566:
1567: }
1568:
1569:
1.9 oster 1570: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1571: RAIDframe device. */
1572:
1573:
1.59 oster 1574: static void
1575: raidinit(raidPtr)
1.1 oster 1576: RF_Raid_t *raidPtr;
1577: {
1578: struct raid_softc *rs;
1.59 oster 1579: int unit;
1.1 oster 1580:
1.59 oster 1581: unit = raidPtr->raidid;
1.1 oster 1582:
1583: rs = &raid_softc[unit];
1584:
1585: /* XXX should check return code first... */
1586: rs->sc_flags |= RAIDF_INITED;
1587:
1.9 oster 1588: sprintf(rs->sc_xname, "raid%d", unit); /* XXX doesn't check bounds. */
1.1 oster 1589:
1.9 oster 1590: rs->sc_dkdev.dk_name = rs->sc_xname;
1.11 oster 1591:
1.1 oster 1592: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1593: * other things, so it's critical to call this *BEFORE* we try putzing
1594: * with disklabels. */
1.11 oster 1595:
1.1 oster 1596: disk_attach(&rs->sc_dkdev);
1597:
1598: /* XXX There may be a weird interaction here between this, and
1.9 oster 1599: * protectedSectors, as used in RAIDframe. */
1.11 oster 1600:
1.9 oster 1601: rs->sc_size = raidPtr->totalSectors;
1.11 oster 1602:
1.1 oster 1603: }
1604:
1605: /* wake up the daemon & tell it to get us a spare table
1606: * XXX
1.9 oster 1607: * the entries in the queues should be tagged with the raidPtr
1.11 oster 1608: * so that in the extremely rare case that two recons happen at once,
1609: * we know for which device were requesting a spare table
1.1 oster 1610: * XXX
1.39 oster 1611: *
1612: * XXX This code is not currently used. GO
1.1 oster 1613: */
1.9 oster 1614: int
1615: rf_GetSpareTableFromDaemon(req)
1616: RF_SparetWait_t *req;
1617: {
1618: int retcode;
1619:
1620: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1621: req->next = rf_sparet_wait_queue;
1622: rf_sparet_wait_queue = req;
1623: wakeup(&rf_sparet_wait_queue);
1624:
1625: /* mpsleep unlocks the mutex */
1626: while (!rf_sparet_resp_queue) {
1.15 oster 1627: tsleep(&rf_sparet_resp_queue, PRIBIO,
1.9 oster 1628: "raidframe getsparetable", 0);
1629: }
1630: req = rf_sparet_resp_queue;
1631: rf_sparet_resp_queue = req->next;
1632: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1633:
1634: retcode = req->fcol;
1635: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1636: * alloc'd */
1637: return (retcode);
1.1 oster 1638: }
1.39 oster 1639:
1.11 oster 1640: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1641: * bp & passes it down.
1.1 oster 1642: * any calls originating in the kernel must use non-blocking I/O
1643: * do some extra sanity checking to return "appropriate" error values for
1644: * certain conditions (to make some standard utilities work)
1.34 oster 1645: *
1646: * Formerly known as: rf_DoAccessKernel
1.1 oster 1647: */
1.34 oster 1648: void
1649: raidstart(raidPtr)
1.9 oster 1650: RF_Raid_t *raidPtr;
1.1 oster 1651: {
1652: RF_SectorCount_t num_blocks, pb, sum;
1653: RF_RaidAddr_t raid_addr;
1.9 oster 1654: int retcode;
1.1 oster 1655: struct partition *pp;
1.9 oster 1656: daddr_t blocknum;
1657: int unit;
1.1 oster 1658: struct raid_softc *rs;
1.9 oster 1659: int do_async;
1.34 oster 1660: struct buf *bp;
1.1 oster 1661:
1662: unit = raidPtr->raidid;
1663: rs = &raid_softc[unit];
1.34 oster 1664:
1.56 oster 1665: /* quick check to see if anything has died recently */
1666: RF_LOCK_MUTEX(raidPtr->mutex);
1667: if (raidPtr->numNewFailures > 0) {
1.91 oster 1668: rf_update_component_labels(raidPtr,
1669: RF_NORMAL_COMPONENT_UPDATE);
1.56 oster 1670: raidPtr->numNewFailures--;
1671: }
1672:
1.34 oster 1673: /* Check to see if we're at the limit... */
1674: while (raidPtr->openings > 0) {
1675: RF_UNLOCK_MUTEX(raidPtr->mutex);
1676:
1677: /* get the next item, if any, from the queue */
1.47 thorpej 1678: if ((bp = BUFQ_FIRST(&rs->buf_queue)) == NULL) {
1.34 oster 1679: /* nothing more to do */
1680: return;
1681: }
1.47 thorpej 1682: BUFQ_REMOVE(&rs->buf_queue, bp);
1.34 oster 1683:
1684: /* Ok, for the bp we have here, bp->b_blkno is relative to the
1685: * partition.. Need to make it absolute to the underlying
1686: * device.. */
1.1 oster 1687:
1.34 oster 1688: blocknum = bp->b_blkno;
1689: if (DISKPART(bp->b_dev) != RAW_PART) {
1690: pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1691: blocknum += pp->p_offset;
1692: }
1.1 oster 1693:
1.34 oster 1694: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1695: (int) blocknum));
1696:
1697: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1698: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1699:
1700: /* *THIS* is where we adjust what block we're going to...
1701: * but DO NOT TOUCH bp->b_blkno!!! */
1702: raid_addr = blocknum;
1703:
1704: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1705: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1706: sum = raid_addr + num_blocks + pb;
1707: if (1 || rf_debugKernelAccess) {
1708: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1709: (int) raid_addr, (int) sum, (int) num_blocks,
1710: (int) pb, (int) bp->b_resid));
1711: }
1712: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1713: || (sum < num_blocks) || (sum < pb)) {
1714: bp->b_error = ENOSPC;
1715: bp->b_flags |= B_ERROR;
1716: bp->b_resid = bp->b_bcount;
1717: biodone(bp);
1718: RF_LOCK_MUTEX(raidPtr->mutex);
1719: continue;
1720: }
1721: /*
1722: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1723: */
1724:
1725: if (bp->b_bcount & raidPtr->sectorMask) {
1726: bp->b_error = EINVAL;
1727: bp->b_flags |= B_ERROR;
1728: bp->b_resid = bp->b_bcount;
1729: biodone(bp);
1730: RF_LOCK_MUTEX(raidPtr->mutex);
1731: continue;
1732:
1733: }
1734: db1_printf(("Calling DoAccess..\n"));
1735:
1.1 oster 1736:
1.34 oster 1737: RF_LOCK_MUTEX(raidPtr->mutex);
1738: raidPtr->openings--;
1739: RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1 oster 1740:
1.34 oster 1741: /*
1742: * Everything is async.
1743: */
1744: do_async = 1;
1745:
1.99 oster 1746: disk_busy(&rs->sc_dkdev);
1747:
1.34 oster 1748: /* XXX we're still at splbio() here... do we *really*
1749: need to be? */
1.20 oster 1750:
1.99 oster 1751: /* don't ever condition on bp->b_flags & B_WRITE.
1752: * always condition on B_READ instead */
1.37 oster 1753:
1.34 oster 1754: retcode = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1755: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1756: do_async, raid_addr, num_blocks,
1.109 oster 1757: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.20 oster 1758:
1759: RF_LOCK_MUTEX(raidPtr->mutex);
1760: }
1.34 oster 1761: RF_UNLOCK_MUTEX(raidPtr->mutex);
1762: }
1.20 oster 1763:
1764:
1.7 explorer 1765:
1766:
1.1 oster 1767: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1768:
1.9 oster 1769: int
1770: rf_DispatchKernelIO(queue, req)
1771: RF_DiskQueue_t *queue;
1772: RF_DiskQueueData_t *req;
1.1 oster 1773: {
1.9 oster 1774: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 1775: struct buf *bp;
1.9 oster 1776: struct raidbuf *raidbp = NULL;
1.1 oster 1777: struct raid_softc *rs;
1.9 oster 1778: int unit;
1.37 oster 1779: int s;
1.9 oster 1780:
1.37 oster 1781: s=0;
1782: /* s = splbio();*/ /* want to test this */
1.1 oster 1783: /* XXX along with the vnode, we also need the softc associated with
1.9 oster 1784: * this device.. */
1785:
1.1 oster 1786: req->queue = queue;
1.9 oster 1787:
1.1 oster 1788: unit = queue->raidPtr->raidid;
1789:
1.9 oster 1790: db1_printf(("DispatchKernelIO unit: %d\n", unit));
1.1 oster 1791:
1.9 oster 1792: if (unit >= numraid) {
1793: printf("Invalid unit number: %d %d\n", unit, numraid);
1.1 oster 1794: panic("Invalid Unit number in rf_DispatchKernelIO\n");
1795: }
1796: rs = &raid_softc[unit];
1797:
1798: bp = req->bp;
1.16 oster 1799: #if 1
1.9 oster 1800: /* XXX when there is a physical disk failure, someone is passing us a
1801: * buffer that contains old stuff!! Attempt to deal with this problem
1802: * without taking a performance hit... (not sure where the real bug
1803: * is. It's buried in RAIDframe somewhere) :-( GO ) */
1.4 oster 1804:
1805: if (bp->b_flags & B_ERROR) {
1806: bp->b_flags &= ~B_ERROR;
1807: }
1.9 oster 1808: if (bp->b_error != 0) {
1.4 oster 1809: bp->b_error = 0;
1810: }
1.16 oster 1811: #endif
1.1 oster 1812: raidbp = RAIDGETBUF(rs);
1813:
1.9 oster 1814: raidbp->rf_flags = 0; /* XXX not really used anywhere... */
1.1 oster 1815:
1816: /*
1817: * context for raidiodone
1818: */
1819: raidbp->rf_obp = bp;
1820: raidbp->req = req;
1821:
1.32 oster 1822: LIST_INIT(&raidbp->rf_buf.b_dep);
1823:
1.1 oster 1824: switch (req->type) {
1.9 oster 1825: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 1826: /* XXX need to do something extra here.. */
1.9 oster 1827: /* I'm leaving this in, as I've never actually seen it used,
1828: * and I'd like folks to report it... GO */
1.1 oster 1829: printf(("WAKEUP CALLED\n"));
1830: queue->numOutstanding++;
1831:
1832: /* XXX need to glue the original buffer into this?? */
1833:
1834: KernelWakeupFunc(&raidbp->rf_buf);
1835: break;
1.9 oster 1836:
1.1 oster 1837: case RF_IO_TYPE_READ:
1838: case RF_IO_TYPE_WRITE:
1.9 oster 1839:
1.1 oster 1840: if (req->tracerec) {
1841: RF_ETIMER_START(req->tracerec->timer);
1842: }
1.9 oster 1843: InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1844: op | bp->b_flags, queue->rf_cinfo->ci_dev,
1845: req->sectorOffset, req->numSector,
1846: req->buf, KernelWakeupFunc, (void *) req,
1847: queue->raidPtr->logBytesPerSector, req->b_proc);
1.1 oster 1848:
1849: if (rf_debugKernelAccess) {
1.9 oster 1850: db1_printf(("dispatch: bp->b_blkno = %ld\n",
1851: (long) bp->b_blkno));
1.1 oster 1852: }
1853: queue->numOutstanding++;
1854: queue->last_deq_sector = req->sectorOffset;
1.9 oster 1855: /* acc wouldn't have been let in if there were any pending
1856: * reqs at any other priority */
1.1 oster 1857: queue->curPriority = req->priority;
1858:
1859: db1_printf(("Going for %c to unit %d row %d col %d\n",
1.9 oster 1860: req->type, unit, queue->row, queue->col));
1.1 oster 1861: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 1862: (int) req->sectorOffset, (int) req->numSector,
1863: (int) (req->numSector <<
1864: queue->raidPtr->logBytesPerSector),
1865: (int) queue->raidPtr->logBytesPerSector));
1.1 oster 1866: if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1867: raidbp->rf_buf.b_vp->v_numoutput++;
1868: }
1.9 oster 1869: VOP_STRATEGY(&raidbp->rf_buf);
1.1 oster 1870:
1871: break;
1.9 oster 1872:
1.1 oster 1873: default:
1874: panic("bad req->type in rf_DispatchKernelIO");
1875: }
1876: db1_printf(("Exiting from DispatchKernelIO\n"));
1.37 oster 1877: /* splx(s); */ /* want to test this */
1.9 oster 1878: return (0);
1.1 oster 1879: }
1.9 oster 1880: /* this is the callback function associated with a I/O invoked from
1.1 oster 1881: kernel code.
1882: */
1.9 oster 1883: static void
1884: KernelWakeupFunc(vbp)
1885: struct buf *vbp;
1886: {
1887: RF_DiskQueueData_t *req = NULL;
1888: RF_DiskQueue_t *queue;
1889: struct raidbuf *raidbp = (struct raidbuf *) vbp;
1890: struct buf *bp;
1891: struct raid_softc *rs;
1892: int unit;
1.74 augustss 1893: int s;
1.9 oster 1894:
1.36 oster 1895: s = splbio();
1.9 oster 1896: db1_printf(("recovering the request queue:\n"));
1897: req = raidbp->req;
1.1 oster 1898:
1.9 oster 1899: bp = raidbp->rf_obp;
1.1 oster 1900:
1.9 oster 1901: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 1902:
1.9 oster 1903: if (raidbp->rf_buf.b_flags & B_ERROR) {
1904: bp->b_flags |= B_ERROR;
1905: bp->b_error = raidbp->rf_buf.b_error ?
1906: raidbp->rf_buf.b_error : EIO;
1907: }
1.1 oster 1908:
1.9 oster 1909: /* XXX methinks this could be wrong... */
1.1 oster 1910: #if 1
1.9 oster 1911: bp->b_resid = raidbp->rf_buf.b_resid;
1.1 oster 1912: #endif
1913:
1.9 oster 1914: if (req->tracerec) {
1915: RF_ETIMER_STOP(req->tracerec->timer);
1916: RF_ETIMER_EVAL(req->tracerec->timer);
1917: RF_LOCK_MUTEX(rf_tracing_mutex);
1918: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1919: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1920: req->tracerec->num_phys_ios++;
1921: RF_UNLOCK_MUTEX(rf_tracing_mutex);
1922: }
1923: bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1.1 oster 1924:
1.9 oster 1925: unit = queue->raidPtr->raidid; /* *Much* simpler :-> */
1.1 oster 1926:
1927:
1.9 oster 1928: /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1929: * ballistic, and mark the component as hosed... */
1.36 oster 1930:
1.9 oster 1931: if (bp->b_flags & B_ERROR) {
1932: /* Mark the disk as dead */
1933: /* but only mark it once... */
1934: if (queue->raidPtr->Disks[queue->row][queue->col].status ==
1935: rf_ds_optimal) {
1936: printf("raid%d: IO Error. Marking %s as failed.\n",
1937: unit, queue->raidPtr->Disks[queue->row][queue->col].devname);
1938: queue->raidPtr->Disks[queue->row][queue->col].status =
1939: rf_ds_failed;
1940: queue->raidPtr->status[queue->row] = rf_rs_degraded;
1941: queue->raidPtr->numFailures++;
1.56 oster 1942: queue->raidPtr->numNewFailures++;
1.9 oster 1943: } else { /* Disk is already dead... */
1944: /* printf("Disk already marked as dead!\n"); */
1945: }
1.4 oster 1946:
1.9 oster 1947: }
1.4 oster 1948:
1.9 oster 1949: rs = &raid_softc[unit];
1950: RAIDPUTBUF(rs, raidbp);
1951:
1952: rf_DiskIOComplete(queue, req, (bp->b_flags & B_ERROR) ? 1 : 0);
1953: (req->CompleteFunc) (req->argument, (bp->b_flags & B_ERROR) ? 1 : 0);
1.1 oster 1954:
1.36 oster 1955: splx(s);
1.1 oster 1956: }
1957:
1958:
1959:
1960: /*
1961: * initialize a buf structure for doing an I/O in the kernel.
1962: */
1.9 oster 1963: static void
1.70 oster 1964: InitBP(bp, b_vp, rw_flag, dev, startSect, numSect, buf, cbFunc, cbArg,
1965: logBytesPerSector, b_proc)
1966: struct buf *bp;
1967: struct vnode *b_vp;
1968: unsigned rw_flag;
1969: dev_t dev;
1970: RF_SectorNum_t startSect;
1971: RF_SectorCount_t numSect;
1972: caddr_t buf;
1973: void (*cbFunc) (struct buf *);
1974: void *cbArg;
1975: int logBytesPerSector;
1976: struct proc *b_proc;
1.9 oster 1977: {
1978: /* bp->b_flags = B_PHYS | rw_flag; */
1979: bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
1980: bp->b_bcount = numSect << logBytesPerSector;
1981: bp->b_bufsize = bp->b_bcount;
1982: bp->b_error = 0;
1983: bp->b_dev = dev;
1.79 thorpej 1984: bp->b_data = buf;
1.9 oster 1985: bp->b_blkno = startSect;
1986: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 1987: if (bp->b_bcount == 0) {
1988: panic("bp->b_bcount is zero in InitBP!!\n");
1989: }
1.9 oster 1990: bp->b_proc = b_proc;
1991: bp->b_iodone = cbFunc;
1992: bp->b_vp = b_vp;
1993:
1.1 oster 1994: }
1995:
1996: static void
1997: raidgetdefaultlabel(raidPtr, rs, lp)
1998: RF_Raid_t *raidPtr;
1999: struct raid_softc *rs;
2000: struct disklabel *lp;
2001: {
2002: db1_printf(("Building a default label...\n"));
1.108 thorpej 2003: memset(lp, 0, sizeof(*lp));
1.1 oster 2004:
2005: /* fabricate a label... */
2006: lp->d_secperunit = raidPtr->totalSectors;
2007: lp->d_secsize = raidPtr->bytesPerSector;
1.45 oster 2008: lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105 oster 2009: lp->d_ntracks = 4 * raidPtr->numCol;
1.45 oster 2010: lp->d_ncylinders = raidPtr->totalSectors /
2011: (lp->d_nsectors * lp->d_ntracks);
1.1 oster 2012: lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2013:
2014: strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9 oster 2015: lp->d_type = DTYPE_RAID;
1.1 oster 2016: strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2017: lp->d_rpm = 3600;
2018: lp->d_interleave = 1;
2019: lp->d_flags = 0;
2020:
2021: lp->d_partitions[RAW_PART].p_offset = 0;
2022: lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2023: lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2024: lp->d_npartitions = RAW_PART + 1;
2025:
2026: lp->d_magic = DISKMAGIC;
2027: lp->d_magic2 = DISKMAGIC;
2028: lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2029:
2030: }
2031: /*
2032: * Read the disklabel from the raid device. If one is not present, fake one
2033: * up.
2034: */
2035: static void
2036: raidgetdisklabel(dev)
1.9 oster 2037: dev_t dev;
1.1 oster 2038: {
1.9 oster 2039: int unit = raidunit(dev);
1.1 oster 2040: struct raid_softc *rs = &raid_softc[unit];
1.9 oster 2041: char *errstring;
1.1 oster 2042: struct disklabel *lp = rs->sc_dkdev.dk_label;
2043: struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2044: RF_Raid_t *raidPtr;
2045:
2046: db1_printf(("Getting the disklabel...\n"));
2047:
1.108 thorpej 2048: memset(clp, 0, sizeof(*clp));
1.1 oster 2049:
2050: raidPtr = raidPtrs[unit];
2051:
2052: raidgetdefaultlabel(raidPtr, rs, lp);
2053:
2054: /*
2055: * Call the generic disklabel extraction routine.
2056: */
2057: errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2058: rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9 oster 2059: if (errstring)
1.1 oster 2060: raidmakedisklabel(rs);
2061: else {
1.9 oster 2062: int i;
1.1 oster 2063: struct partition *pp;
2064:
2065: /*
2066: * Sanity check whether the found disklabel is valid.
2067: *
2068: * This is necessary since total size of the raid device
2069: * may vary when an interleave is changed even though exactly
2070: * same componets are used, and old disklabel may used
2071: * if that is found.
2072: */
2073: if (lp->d_secperunit != rs->sc_size)
1.117.6.3! gehenna 2074: printf("raid%d: WARNING: %s: "
1.1 oster 2075: "total sector size in disklabel (%d) != "
1.117.6.3! gehenna 2076: "the size of raid (%ld)\n", unit, rs->sc_xname,
1.18 oster 2077: lp->d_secperunit, (long) rs->sc_size);
1.1 oster 2078: for (i = 0; i < lp->d_npartitions; i++) {
2079: pp = &lp->d_partitions[i];
2080: if (pp->p_offset + pp->p_size > rs->sc_size)
1.117.6.3! gehenna 2081: printf("raid%d: WARNING: %s: end of partition `%c' "
! 2082: "exceeds the size of raid (%ld)\n",
! 2083: unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
1.1 oster 2084: }
2085: }
2086:
2087: }
2088: /*
2089: * Take care of things one might want to take care of in the event
2090: * that a disklabel isn't present.
2091: */
2092: static void
2093: raidmakedisklabel(rs)
2094: struct raid_softc *rs;
2095: {
2096: struct disklabel *lp = rs->sc_dkdev.dk_label;
2097: db1_printf(("Making a label..\n"));
2098:
2099: /*
2100: * For historical reasons, if there's no disklabel present
2101: * the raw partition must be marked FS_BSDFFS.
2102: */
2103:
2104: lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2105:
2106: strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2107:
2108: lp->d_checksum = dkcksum(lp);
2109: }
2110: /*
2111: * Lookup the provided name in the filesystem. If the file exists,
2112: * is a valid block device, and isn't being used by anyone else,
2113: * set *vpp to the file's vnode.
1.9 oster 2114: * You'll find the original of this in ccd.c
1.1 oster 2115: */
2116: int
2117: raidlookup(path, p, vpp)
1.9 oster 2118: char *path;
1.1 oster 2119: struct proc *p;
2120: struct vnode **vpp; /* result */
2121: {
2122: struct nameidata nd;
2123: struct vnode *vp;
2124: struct vattr va;
1.9 oster 2125: int error;
1.1 oster 2126:
2127: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1.9 oster 2128: if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1.117.6.3! gehenna 2129: #if 0
1.9 oster 2130: printf("RAIDframe: vn_open returned %d\n", error);
1.1 oster 2131: #endif
2132: return (error);
2133: }
2134: vp = nd.ni_vp;
2135: if (vp->v_usecount > 1) {
2136: VOP_UNLOCK(vp, 0);
1.9 oster 2137: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2138: return (EBUSY);
2139: }
2140: if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
2141: VOP_UNLOCK(vp, 0);
1.9 oster 2142: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2143: return (error);
2144: }
2145: /* XXX: eventually we should handle VREG, too. */
2146: if (va.va_type != VBLK) {
2147: VOP_UNLOCK(vp, 0);
1.9 oster 2148: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2149: return (ENOTBLK);
2150: }
2151: VOP_UNLOCK(vp, 0);
2152: *vpp = vp;
2153: return (0);
2154: }
2155: /*
2156: * Wait interruptibly for an exclusive lock.
2157: *
2158: * XXX
2159: * Several drivers do this; it should be abstracted and made MP-safe.
2160: * (Hmm... where have we seen this warning before :-> GO )
2161: */
2162: static int
2163: raidlock(rs)
2164: struct raid_softc *rs;
2165: {
1.9 oster 2166: int error;
1.1 oster 2167:
2168: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2169: rs->sc_flags |= RAIDF_WANTED;
1.9 oster 2170: if ((error =
2171: tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1 oster 2172: return (error);
2173: }
2174: rs->sc_flags |= RAIDF_LOCKED;
2175: return (0);
2176: }
2177: /*
2178: * Unlock and wake up any waiters.
2179: */
2180: static void
2181: raidunlock(rs)
2182: struct raid_softc *rs;
2183: {
2184:
2185: rs->sc_flags &= ~RAIDF_LOCKED;
2186: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2187: rs->sc_flags &= ~RAIDF_WANTED;
2188: wakeup(rs);
2189: }
1.11 oster 2190: }
2191:
2192:
2193: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2194: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2195:
2196: int
1.12 oster 2197: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2198: {
1.48 oster 2199: RF_ComponentLabel_t clabel;
2200: raidread_component_label(dev, b_vp, &clabel);
2201: clabel.mod_counter = mod_counter;
2202: clabel.clean = RF_RAID_CLEAN;
2203: raidwrite_component_label(dev, b_vp, &clabel);
1.12 oster 2204: return(0);
2205: }
2206:
2207:
2208: int
2209: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1.11 oster 2210: {
1.48 oster 2211: RF_ComponentLabel_t clabel;
2212: raidread_component_label(dev, b_vp, &clabel);
2213: clabel.mod_counter = mod_counter;
2214: clabel.clean = RF_RAID_DIRTY;
2215: raidwrite_component_label(dev, b_vp, &clabel);
1.11 oster 2216: return(0);
2217: }
2218:
2219: /* ARGSUSED */
2220: int
1.48 oster 2221: raidread_component_label(dev, b_vp, clabel)
1.11 oster 2222: dev_t dev;
2223: struct vnode *b_vp;
1.48 oster 2224: RF_ComponentLabel_t *clabel;
1.11 oster 2225: {
2226: struct buf *bp;
1.117.6.1 gehenna 2227: const struct bdevsw *bdev;
1.11 oster 2228: int error;
2229:
2230: /* XXX should probably ensure that we don't try to do this if
2231: someone has changed rf_protected_sectors. */
2232:
1.98 oster 2233: if (b_vp == NULL) {
2234: /* For whatever reason, this component is not valid.
2235: Don't try to read a component label from it. */
2236: return(EINVAL);
2237: }
2238:
1.11 oster 2239: /* get a block of the appropriate size... */
2240: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2241: bp->b_dev = dev;
2242:
2243: /* get our ducks in a row for the read */
2244: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2245: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100 chs 2246: bp->b_flags |= B_READ;
1.11 oster 2247: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2248:
1.117.6.1 gehenna 2249: bdev = bdevsw_lookup(bp->b_dev);
2250: if (bdev == NULL)
2251: return (ENXIO);
2252: (*bdev->d_strategy)(bp);
1.11 oster 2253:
2254: error = biowait(bp);
2255:
2256: if (!error) {
1.79 thorpej 2257: memcpy(clabel, bp->b_data,
1.11 oster 2258: sizeof(RF_ComponentLabel_t));
1.12 oster 2259: #if 0
1.67 oster 2260: rf_print_component_label( clabel );
1.11 oster 2261: #endif
2262: } else {
1.48 oster 2263: #if 0
1.11 oster 2264: printf("Failed to read RAID component label!\n");
1.48 oster 2265: #endif
1.11 oster 2266: }
2267:
2268: brelse(bp);
2269: return(error);
2270: }
2271: /* ARGSUSED */
2272: int
1.48 oster 2273: raidwrite_component_label(dev, b_vp, clabel)
1.11 oster 2274: dev_t dev;
2275: struct vnode *b_vp;
1.48 oster 2276: RF_ComponentLabel_t *clabel;
1.11 oster 2277: {
2278: struct buf *bp;
1.117.6.1 gehenna 2279: const struct bdevsw *bdev;
1.11 oster 2280: int error;
2281:
2282: /* get a block of the appropriate size... */
2283: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2284: bp->b_dev = dev;
2285:
2286: /* get our ducks in a row for the write */
2287: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2288: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100 chs 2289: bp->b_flags |= B_WRITE;
1.11 oster 2290: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2291:
1.79 thorpej 2292: memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
1.11 oster 2293:
1.79 thorpej 2294: memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 2295:
1.117.6.1 gehenna 2296: bdev = bdevsw_lookup(bp->b_dev);
2297: if (bdev == NULL)
2298: return (ENXIO);
2299: (*bdev->d_strategy)(bp);
1.11 oster 2300: error = biowait(bp);
2301: brelse(bp);
2302: if (error) {
1.48 oster 2303: #if 1
1.11 oster 2304: printf("Failed to write RAID component info!\n");
1.48 oster 2305: #endif
1.11 oster 2306: }
2307:
2308: return(error);
1.1 oster 2309: }
1.12 oster 2310:
2311: void
1.70 oster 2312: rf_markalldirty(raidPtr)
1.12 oster 2313: RF_Raid_t *raidPtr;
2314: {
1.48 oster 2315: RF_ComponentLabel_t clabel;
1.12 oster 2316: int r,c;
2317:
2318: raidPtr->mod_counter++;
2319: for (r = 0; r < raidPtr->numRow; r++) {
2320: for (c = 0; c < raidPtr->numCol; c++) {
1.98 oster 2321: /* we don't want to touch (at all) a disk that has
2322: failed */
2323: if (!RF_DEAD_DISK(raidPtr->Disks[r][c].status)) {
1.12 oster 2324: raidread_component_label(
2325: raidPtr->Disks[r][c].dev,
2326: raidPtr->raid_cinfo[r][c].ci_vp,
1.48 oster 2327: &clabel);
2328: if (clabel.status == rf_ds_spared) {
1.12 oster 2329: /* XXX do something special...
2330: but whatever you do, don't
2331: try to access it!! */
2332: } else {
2333: #if 0
1.48 oster 2334: clabel.status =
1.12 oster 2335: raidPtr->Disks[r][c].status;
2336: raidwrite_component_label(
2337: raidPtr->Disks[r][c].dev,
2338: raidPtr->raid_cinfo[r][c].ci_vp,
1.48 oster 2339: &clabel);
1.12 oster 2340: #endif
2341: raidmarkdirty(
2342: raidPtr->Disks[r][c].dev,
2343: raidPtr->raid_cinfo[r][c].ci_vp,
2344: raidPtr->mod_counter);
2345: }
2346: }
2347: }
2348: }
1.13 oster 2349: /* printf("Component labels marked dirty.\n"); */
1.12 oster 2350: #if 0
2351: for( c = 0; c < raidPtr->numSpare ; c++) {
2352: sparecol = raidPtr->numCol + c;
2353: if (raidPtr->Disks[r][sparecol].status == rf_ds_used_spare) {
2354: /*
2355:
2356: XXX this is where we get fancy and map this spare
2357: into it's correct spot in the array.
2358:
2359: */
2360: /*
2361:
2362: we claim this disk is "optimal" if it's
2363: rf_ds_used_spare, as that means it should be
2364: directly substitutable for the disk it replaced.
2365: We note that too...
2366:
2367: */
2368:
2369: for(i=0;i<raidPtr->numRow;i++) {
2370: for(j=0;j<raidPtr->numCol;j++) {
2371: if ((raidPtr->Disks[i][j].spareRow ==
2372: r) &&
2373: (raidPtr->Disks[i][j].spareCol ==
2374: sparecol)) {
2375: srow = r;
2376: scol = sparecol;
2377: break;
2378: }
2379: }
2380: }
2381:
2382: raidread_component_label(
2383: raidPtr->Disks[r][sparecol].dev,
2384: raidPtr->raid_cinfo[r][sparecol].ci_vp,
1.48 oster 2385: &clabel);
1.12 oster 2386: /* make sure status is noted */
1.48 oster 2387: clabel.version = RF_COMPONENT_LABEL_VERSION;
2388: clabel.mod_counter = raidPtr->mod_counter;
2389: clabel.serial_number = raidPtr->serial_number;
2390: clabel.row = srow;
2391: clabel.column = scol;
2392: clabel.num_rows = raidPtr->numRow;
2393: clabel.num_columns = raidPtr->numCol;
2394: clabel.clean = RF_RAID_DIRTY; /* changed in a bit*/
2395: clabel.status = rf_ds_optimal;
1.12 oster 2396: raidwrite_component_label(
2397: raidPtr->Disks[r][sparecol].dev,
2398: raidPtr->raid_cinfo[r][sparecol].ci_vp,
1.48 oster 2399: &clabel);
1.12 oster 2400: raidmarkclean( raidPtr->Disks[r][sparecol].dev,
2401: raidPtr->raid_cinfo[r][sparecol].ci_vp);
2402: }
2403: }
2404:
2405: #endif
2406: }
2407:
1.13 oster 2408:
2409: void
1.91 oster 2410: rf_update_component_labels(raidPtr, final)
1.13 oster 2411: RF_Raid_t *raidPtr;
1.91 oster 2412: int final;
1.13 oster 2413: {
1.48 oster 2414: RF_ComponentLabel_t clabel;
1.13 oster 2415: int sparecol;
2416: int r,c;
2417: int i,j;
2418: int srow, scol;
2419:
2420: srow = -1;
2421: scol = -1;
2422:
2423: /* XXX should do extra checks to make sure things really are clean,
2424: rather than blindly setting the clean bit... */
2425:
2426: raidPtr->mod_counter++;
2427:
2428: for (r = 0; r < raidPtr->numRow; r++) {
2429: for (c = 0; c < raidPtr->numCol; c++) {
2430: if (raidPtr->Disks[r][c].status == rf_ds_optimal) {
2431: raidread_component_label(
2432: raidPtr->Disks[r][c].dev,
2433: raidPtr->raid_cinfo[r][c].ci_vp,
1.48 oster 2434: &clabel);
1.13 oster 2435: /* make sure status is noted */
1.48 oster 2436: clabel.status = rf_ds_optimal;
1.57 oster 2437: /* bump the counter */
1.60 oster 2438: clabel.mod_counter = raidPtr->mod_counter;
1.57 oster 2439:
1.13 oster 2440: raidwrite_component_label(
2441: raidPtr->Disks[r][c].dev,
2442: raidPtr->raid_cinfo[r][c].ci_vp,
1.48 oster 2443: &clabel);
1.91 oster 2444: if (final == RF_FINAL_COMPONENT_UPDATE) {
2445: if (raidPtr->parity_good == RF_RAID_CLEAN) {
2446: raidmarkclean(
2447: raidPtr->Disks[r][c].dev,
2448: raidPtr->raid_cinfo[r][c].ci_vp,
2449: raidPtr->mod_counter);
2450: }
2451: }
1.13 oster 2452: }
2453: /* else we don't touch it.. */
1.63 oster 2454: }
2455: }
2456:
2457: for( c = 0; c < raidPtr->numSpare ; c++) {
2458: sparecol = raidPtr->numCol + c;
1.110 oster 2459: /* Need to ensure that the reconstruct actually completed! */
1.111 oster 2460: if (raidPtr->Disks[0][sparecol].status == rf_ds_used_spare) {
1.63 oster 2461: /*
2462:
2463: we claim this disk is "optimal" if it's
2464: rf_ds_used_spare, as that means it should be
2465: directly substitutable for the disk it replaced.
2466: We note that too...
2467:
2468: */
2469:
2470: for(i=0;i<raidPtr->numRow;i++) {
2471: for(j=0;j<raidPtr->numCol;j++) {
2472: if ((raidPtr->Disks[i][j].spareRow ==
2473: 0) &&
2474: (raidPtr->Disks[i][j].spareCol ==
2475: sparecol)) {
2476: srow = i;
2477: scol = j;
2478: break;
2479: }
2480: }
2481: }
2482:
2483: /* XXX shouldn't *really* need this... */
2484: raidread_component_label(
2485: raidPtr->Disks[0][sparecol].dev,
2486: raidPtr->raid_cinfo[0][sparecol].ci_vp,
2487: &clabel);
2488: /* make sure status is noted */
2489:
2490: raid_init_component_label(raidPtr, &clabel);
2491:
2492: clabel.mod_counter = raidPtr->mod_counter;
2493: clabel.row = srow;
2494: clabel.column = scol;
2495: clabel.status = rf_ds_optimal;
2496:
2497: raidwrite_component_label(
2498: raidPtr->Disks[0][sparecol].dev,
2499: raidPtr->raid_cinfo[0][sparecol].ci_vp,
2500: &clabel);
1.91 oster 2501: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2502: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.91 oster 2503: raidmarkclean( raidPtr->Disks[0][sparecol].dev,
2504: raidPtr->raid_cinfo[0][sparecol].ci_vp,
2505: raidPtr->mod_counter);
1.13 oster 2506: }
2507: }
2508: }
2509: }
2510: /* printf("Component labels updated\n"); */
1.68 oster 2511: }
2512:
2513: void
1.70 oster 2514: rf_close_component(raidPtr, vp, auto_configured)
1.69 oster 2515: RF_Raid_t *raidPtr;
2516: struct vnode *vp;
2517: int auto_configured;
2518: {
2519: struct proc *p;
2520:
2521: p = raidPtr->engine_thread;
2522:
2523: if (vp != NULL) {
2524: if (auto_configured == 1) {
1.96 oster 2525: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2526: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.69 oster 2527: vput(vp);
2528:
2529: } else {
2530: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
2531: }
2532: } else {
1.117.6.3! gehenna 2533: #if 0
1.69 oster 2534: printf("vnode was NULL\n");
1.117.6.3! gehenna 2535: #endif
1.69 oster 2536: }
2537: }
2538:
2539:
2540: void
1.70 oster 2541: rf_UnconfigureVnodes(raidPtr)
1.68 oster 2542: RF_Raid_t *raidPtr;
2543: {
2544: int r,c;
2545: struct proc *p;
1.69 oster 2546: struct vnode *vp;
2547: int acd;
1.68 oster 2548:
2549:
2550: /* We take this opportunity to close the vnodes like we should.. */
2551:
2552: p = raidPtr->engine_thread;
2553:
2554: for (r = 0; r < raidPtr->numRow; r++) {
2555: for (c = 0; c < raidPtr->numCol; c++) {
1.117.6.3! gehenna 2556: #if 0
! 2557: printf("raid%d: Closing vnode for row: %d col: %d\n",
! 2558: raidPtr->raidid, r, c);
! 2559: #endif
1.69 oster 2560: vp = raidPtr->raid_cinfo[r][c].ci_vp;
2561: acd = raidPtr->Disks[r][c].auto_configured;
2562: rf_close_component(raidPtr, vp, acd);
2563: raidPtr->raid_cinfo[r][c].ci_vp = NULL;
2564: raidPtr->Disks[r][c].auto_configured = 0;
1.68 oster 2565: }
2566: }
2567: for (r = 0; r < raidPtr->numSpare; r++) {
1.117.6.3! gehenna 2568: #if 0
! 2569: printf("raid%d: Closing vnode for spare: %d\n",
! 2570: raidPtr->raidid, r);
! 2571: #endif
1.69 oster 2572: vp = raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp;
2573: acd = raidPtr->Disks[0][raidPtr->numCol + r].auto_configured;
2574: rf_close_component(raidPtr, vp, acd);
2575: raidPtr->raid_cinfo[0][raidPtr->numCol + r].ci_vp = NULL;
2576: raidPtr->Disks[0][raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2577: }
1.37 oster 2578: }
1.63 oster 2579:
1.37 oster 2580:
2581: void
2582: rf_ReconThread(req)
2583: struct rf_recon_req *req;
2584: {
2585: int s;
2586: RF_Raid_t *raidPtr;
2587:
2588: s = splbio();
2589: raidPtr = (RF_Raid_t *) req->raidPtr;
2590: raidPtr->recon_in_progress = 1;
2591:
2592: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->row, req->col,
2593: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2594:
2595: /* XXX get rid of this! we don't need it at all.. */
2596: RF_Free(req, sizeof(*req));
2597:
2598: raidPtr->recon_in_progress = 0;
2599: splx(s);
2600:
2601: /* That's all... */
2602: kthread_exit(0); /* does not return */
2603: }
2604:
2605: void
2606: rf_RewriteParityThread(raidPtr)
2607: RF_Raid_t *raidPtr;
2608: {
2609: int retcode;
2610: int s;
2611:
2612: raidPtr->parity_rewrite_in_progress = 1;
2613: s = splbio();
2614: retcode = rf_RewriteParity(raidPtr);
2615: splx(s);
2616: if (retcode) {
2617: printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2618: } else {
2619: /* set the clean bit! If we shutdown correctly,
2620: the clean bit on each component label will get
2621: set */
2622: raidPtr->parity_good = RF_RAID_CLEAN;
2623: }
2624: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2625:
2626: /* Anyone waiting for us to stop? If so, inform them... */
2627: if (raidPtr->waitShutdown) {
2628: wakeup(&raidPtr->parity_rewrite_in_progress);
2629: }
1.37 oster 2630:
2631: /* That's all... */
2632: kthread_exit(0); /* does not return */
2633: }
2634:
2635:
2636: void
2637: rf_CopybackThread(raidPtr)
2638: RF_Raid_t *raidPtr;
2639: {
2640: int s;
2641:
2642: raidPtr->copyback_in_progress = 1;
2643: s = splbio();
2644: rf_CopybackReconstructedData(raidPtr);
2645: splx(s);
2646: raidPtr->copyback_in_progress = 0;
2647:
2648: /* That's all... */
2649: kthread_exit(0); /* does not return */
2650: }
2651:
2652:
2653: void
2654: rf_ReconstructInPlaceThread(req)
2655: struct rf_recon_req *req;
2656: {
2657: int retcode;
2658: int s;
2659: RF_Raid_t *raidPtr;
2660:
2661: s = splbio();
2662: raidPtr = req->raidPtr;
2663: raidPtr->recon_in_progress = 1;
2664: retcode = rf_ReconstructInPlace(raidPtr, req->row, req->col);
2665: RF_Free(req, sizeof(*req));
2666: raidPtr->recon_in_progress = 0;
2667: splx(s);
2668:
2669: /* That's all... */
2670: kthread_exit(0); /* does not return */
1.48 oster 2671: }
2672:
2673: void
2674: rf_mountroot_hook(dev)
2675: struct device *dev;
2676: {
1.59 oster 2677:
1.48 oster 2678: }
2679:
2680:
2681: RF_AutoConfig_t *
2682: rf_find_raid_components()
2683: {
2684: struct vnode *vp;
2685: struct disklabel label;
2686: struct device *dv;
2687: dev_t dev;
1.117.6.1 gehenna 2688: int bmajor;
1.48 oster 2689: int error;
2690: int i;
2691: int good_one;
2692: RF_ComponentLabel_t *clabel;
2693: RF_AutoConfig_t *ac_list;
2694: RF_AutoConfig_t *ac;
2695:
2696:
2697: /* initialize the AutoConfig list */
2698: ac_list = NULL;
2699:
2700: /* we begin by trolling through *all* the devices on the system */
2701:
2702: for (dv = alldevs.tqh_first; dv != NULL;
2703: dv = dv->dv_list.tqe_next) {
2704:
2705: /* we are only interested in disks... */
2706: if (dv->dv_class != DV_DISK)
2707: continue;
2708:
2709: /* we don't care about floppies... */
2710: if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fd")) {
1.117.6.2 gehenna 2711: continue;
2712: }
2713: /* hdfd is the Atari/Hades floppy driver */
2714: if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"hdfd")) {
1.117.6.3! gehenna 2715: continue;
! 2716: }
! 2717: /* fdisa is the Atari/Milan floppy driver */
! 2718: if (!strcmp(dv->dv_cfdata->cf_driver->cd_name,"fdisa")) {
1.48 oster 2719: continue;
2720: }
2721:
2722: /* need to find the device_name_to_block_device_major stuff */
1.117.6.1 gehenna 2723: bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
1.48 oster 2724:
2725: /* get a vnode for the raw partition of this disk */
2726:
1.117.6.1 gehenna 2727: dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
1.48 oster 2728: if (bdevvp(dev, &vp))
2729: panic("RAID can't alloc vnode");
2730:
2731: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2732:
2733: if (error) {
2734: /* "Who cares." Continue looking
2735: for something that exists*/
2736: vput(vp);
2737: continue;
2738: }
2739:
2740: /* Ok, the disk exists. Go get the disklabel. */
2741: error = VOP_IOCTL(vp, DIOCGDINFO, (caddr_t)&label,
2742: FREAD, NOCRED, 0);
2743: if (error) {
2744: /*
2745: * XXX can't happen - open() would
2746: * have errored out (or faked up one)
2747: */
2748: printf("can't get label for dev %s%c (%d)!?!?\n",
2749: dv->dv_xname, 'a' + RAW_PART, error);
2750: }
2751:
2752: /* don't need this any more. We'll allocate it again
2753: a little later if we really do... */
1.96 oster 2754: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2755: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48 oster 2756: vput(vp);
2757:
2758: for (i=0; i < label.d_npartitions; i++) {
2759: /* We only support partitions marked as RAID */
2760: if (label.d_partitions[i].p_fstype != FS_RAID)
2761: continue;
2762:
1.117.6.1 gehenna 2763: dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
1.48 oster 2764: if (bdevvp(dev, &vp))
2765: panic("RAID can't alloc vnode");
2766:
2767: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2768: if (error) {
2769: /* Whatever... */
2770: vput(vp);
2771: continue;
2772: }
2773:
2774: good_one = 0;
2775:
2776: clabel = (RF_ComponentLabel_t *)
2777: malloc(sizeof(RF_ComponentLabel_t),
2778: M_RAIDFRAME, M_NOWAIT);
2779: if (clabel == NULL) {
2780: /* XXX CLEANUP HERE */
2781: printf("RAID auto config: out of memory!\n");
2782: return(NULL); /* XXX probably should panic? */
2783: }
2784:
2785: if (!raidread_component_label(dev, vp, clabel)) {
2786: /* Got the label. Does it look reasonable? */
1.49 oster 2787: if (rf_reasonable_label(clabel) &&
1.54 oster 2788: (clabel->partitionSize <=
1.48 oster 2789: label.d_partitions[i].p_size)) {
2790: #if DEBUG
2791: printf("Component on: %s%c: %d\n",
2792: dv->dv_xname, 'a'+i,
2793: label.d_partitions[i].p_size);
1.67 oster 2794: rf_print_component_label(clabel);
1.48 oster 2795: #endif
2796: /* if it's reasonable, add it,
2797: else ignore it. */
2798: ac = (RF_AutoConfig_t *)
2799: malloc(sizeof(RF_AutoConfig_t),
2800: M_RAIDFRAME,
2801: M_NOWAIT);
2802: if (ac == NULL) {
2803: /* XXX should panic?? */
2804: return(NULL);
2805: }
2806:
2807: sprintf(ac->devname, "%s%c",
2808: dv->dv_xname, 'a'+i);
2809: ac->dev = dev;
2810: ac->vp = vp;
2811: ac->clabel = clabel;
2812: ac->next = ac_list;
2813: ac_list = ac;
2814: good_one = 1;
2815: }
2816: }
2817: if (!good_one) {
2818: /* cleanup */
2819: free(clabel, M_RAIDFRAME);
1.96 oster 2820: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2821: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48 oster 2822: vput(vp);
2823: }
2824: }
2825: }
1.106 oster 2826: return(ac_list);
1.48 oster 2827: }
2828:
2829: static int
1.49 oster 2830: rf_reasonable_label(clabel)
1.48 oster 2831: RF_ComponentLabel_t *clabel;
2832: {
2833:
2834: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2835: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2836: ((clabel->clean == RF_RAID_CLEAN) ||
2837: (clabel->clean == RF_RAID_DIRTY)) &&
2838: clabel->row >=0 &&
2839: clabel->column >= 0 &&
2840: clabel->num_rows > 0 &&
2841: clabel->num_columns > 0 &&
2842: clabel->row < clabel->num_rows &&
2843: clabel->column < clabel->num_columns &&
2844: clabel->blockSize > 0 &&
2845: clabel->numBlocks > 0) {
2846: /* label looks reasonable enough... */
2847: return(1);
2848: }
2849: return(0);
2850: }
2851:
2852:
2853: void
1.67 oster 2854: rf_print_component_label(clabel)
1.48 oster 2855: RF_ComponentLabel_t *clabel;
2856: {
2857: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
2858: clabel->row, clabel->column,
2859: clabel->num_rows, clabel->num_columns);
2860: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2861: clabel->version, clabel->serial_number,
2862: clabel->mod_counter);
2863: printf(" Clean: %s Status: %d\n",
2864: clabel->clean ? "Yes" : "No", clabel->status );
2865: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2866: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2867: printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
2868: (char) clabel->parityConfig, clabel->blockSize,
2869: clabel->numBlocks);
2870: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
1.75 oster 2871: printf(" Contains root partition: %s\n",
2872: clabel->root_partition ? "Yes" : "No" );
1.48 oster 2873: printf(" Last configured as: raid%d\n", clabel->last_unit );
1.51 oster 2874: #if 0
2875: printf(" Config order: %d\n", clabel->config_order);
2876: #endif
1.48 oster 2877:
2878: }
2879:
2880: RF_ConfigSet_t *
2881: rf_create_auto_sets(ac_list)
2882: RF_AutoConfig_t *ac_list;
2883: {
2884: RF_AutoConfig_t *ac;
2885: RF_ConfigSet_t *config_sets;
2886: RF_ConfigSet_t *cset;
2887: RF_AutoConfig_t *ac_next;
2888:
2889:
2890: config_sets = NULL;
2891:
2892: /* Go through the AutoConfig list, and figure out which components
2893: belong to what sets. */
2894: ac = ac_list;
2895: while(ac!=NULL) {
2896: /* we're going to putz with ac->next, so save it here
2897: for use at the end of the loop */
2898: ac_next = ac->next;
2899:
2900: if (config_sets == NULL) {
2901: /* will need at least this one... */
2902: config_sets = (RF_ConfigSet_t *)
2903: malloc(sizeof(RF_ConfigSet_t),
2904: M_RAIDFRAME, M_NOWAIT);
2905: if (config_sets == NULL) {
2906: panic("rf_create_auto_sets: No memory!\n");
2907: }
2908: /* this one is easy :) */
2909: config_sets->ac = ac;
2910: config_sets->next = NULL;
1.51 oster 2911: config_sets->rootable = 0;
1.48 oster 2912: ac->next = NULL;
2913: } else {
2914: /* which set does this component fit into? */
2915: cset = config_sets;
2916: while(cset!=NULL) {
1.49 oster 2917: if (rf_does_it_fit(cset, ac)) {
1.86 oster 2918: /* looks like it matches... */
2919: ac->next = cset->ac;
2920: cset->ac = ac;
1.48 oster 2921: break;
2922: }
2923: cset = cset->next;
2924: }
2925: if (cset==NULL) {
2926: /* didn't find a match above... new set..*/
2927: cset = (RF_ConfigSet_t *)
2928: malloc(sizeof(RF_ConfigSet_t),
2929: M_RAIDFRAME, M_NOWAIT);
2930: if (cset == NULL) {
2931: panic("rf_create_auto_sets: No memory!\n");
2932: }
2933: cset->ac = ac;
2934: ac->next = NULL;
2935: cset->next = config_sets;
1.51 oster 2936: cset->rootable = 0;
1.48 oster 2937: config_sets = cset;
2938: }
2939: }
2940: ac = ac_next;
2941: }
2942:
2943:
2944: return(config_sets);
2945: }
2946:
2947: static int
1.49 oster 2948: rf_does_it_fit(cset, ac)
1.48 oster 2949: RF_ConfigSet_t *cset;
2950: RF_AutoConfig_t *ac;
2951: {
2952: RF_ComponentLabel_t *clabel1, *clabel2;
2953:
2954: /* If this one matches the *first* one in the set, that's good
2955: enough, since the other members of the set would have been
2956: through here too... */
1.60 oster 2957: /* note that we are not checking partitionSize here..
2958:
2959: Note that we are also not checking the mod_counters here.
2960: If everything else matches execpt the mod_counter, that's
2961: good enough for this test. We will deal with the mod_counters
2962: a little later in the autoconfiguration process.
2963:
2964: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 2965:
2966: The reason we don't check for this is that failed disks
2967: will have lower modification counts. If those disks are
2968: not added to the set they used to belong to, then they will
2969: form their own set, which may result in 2 different sets,
2970: for example, competing to be configured at raid0, and
2971: perhaps competing to be the root filesystem set. If the
2972: wrong ones get configured, or both attempt to become /,
2973: weird behaviour and or serious lossage will occur. Thus we
2974: need to bring them into the fold here, and kick them out at
2975: a later point.
1.60 oster 2976:
2977: */
1.48 oster 2978:
2979: clabel1 = cset->ac->clabel;
2980: clabel2 = ac->clabel;
2981: if ((clabel1->version == clabel2->version) &&
2982: (clabel1->serial_number == clabel2->serial_number) &&
2983: (clabel1->num_rows == clabel2->num_rows) &&
2984: (clabel1->num_columns == clabel2->num_columns) &&
2985: (clabel1->sectPerSU == clabel2->sectPerSU) &&
2986: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2987: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2988: (clabel1->parityConfig == clabel2->parityConfig) &&
2989: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2990: (clabel1->blockSize == clabel2->blockSize) &&
2991: (clabel1->numBlocks == clabel2->numBlocks) &&
2992: (clabel1->autoconfigure == clabel2->autoconfigure) &&
2993: (clabel1->root_partition == clabel2->root_partition) &&
2994: (clabel1->last_unit == clabel2->last_unit) &&
2995: (clabel1->config_order == clabel2->config_order)) {
2996: /* if it get's here, it almost *has* to be a match */
2997: } else {
2998: /* it's not consistent with somebody in the set..
2999: punt */
3000: return(0);
3001: }
3002: /* all was fine.. it must fit... */
3003: return(1);
3004: }
3005:
3006: int
1.51 oster 3007: rf_have_enough_components(cset)
3008: RF_ConfigSet_t *cset;
1.48 oster 3009: {
1.51 oster 3010: RF_AutoConfig_t *ac;
3011: RF_AutoConfig_t *auto_config;
3012: RF_ComponentLabel_t *clabel;
3013: int r,c;
3014: int num_rows;
3015: int num_cols;
3016: int num_missing;
1.86 oster 3017: int mod_counter;
1.87 oster 3018: int mod_counter_found;
1.88 oster 3019: int even_pair_failed;
3020: char parity_type;
3021:
1.51 oster 3022:
1.48 oster 3023: /* check to see that we have enough 'live' components
3024: of this set. If so, we can configure it if necessary */
3025:
1.51 oster 3026: num_rows = cset->ac->clabel->num_rows;
3027: num_cols = cset->ac->clabel->num_columns;
1.88 oster 3028: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 3029:
3030: /* XXX Check for duplicate components!?!?!? */
3031:
1.86 oster 3032: /* Determine what the mod_counter is supposed to be for this set. */
3033:
1.87 oster 3034: mod_counter_found = 0;
1.101 oster 3035: mod_counter = 0;
1.86 oster 3036: ac = cset->ac;
3037: while(ac!=NULL) {
1.87 oster 3038: if (mod_counter_found==0) {
1.86 oster 3039: mod_counter = ac->clabel->mod_counter;
1.87 oster 3040: mod_counter_found = 1;
3041: } else {
3042: if (ac->clabel->mod_counter > mod_counter) {
3043: mod_counter = ac->clabel->mod_counter;
3044: }
1.86 oster 3045: }
3046: ac = ac->next;
3047: }
3048:
1.51 oster 3049: num_missing = 0;
3050: auto_config = cset->ac;
3051:
3052: for(r=0; r<num_rows; r++) {
1.88 oster 3053: even_pair_failed = 0;
1.51 oster 3054: for(c=0; c<num_cols; c++) {
3055: ac = auto_config;
3056: while(ac!=NULL) {
3057: if ((ac->clabel->row == r) &&
1.86 oster 3058: (ac->clabel->column == c) &&
3059: (ac->clabel->mod_counter == mod_counter)) {
1.51 oster 3060: /* it's this one... */
3061: #if DEBUG
3062: printf("Found: %s at %d,%d\n",
3063: ac->devname,r,c);
3064: #endif
3065: break;
3066: }
3067: ac=ac->next;
3068: }
3069: if (ac==NULL) {
3070: /* Didn't find one here! */
1.88 oster 3071: /* special case for RAID 1, especially
3072: where there are more than 2
3073: components (where RAIDframe treats
3074: things a little differently :( ) */
3075: if (parity_type == '1') {
3076: if (c%2 == 0) { /* even component */
3077: even_pair_failed = 1;
3078: } else { /* odd component. If
3079: we're failed, and
3080: so is the even
3081: component, it's
3082: "Good Night, Charlie" */
3083: if (even_pair_failed == 1) {
3084: return(0);
3085: }
3086: }
3087: } else {
3088: /* normal accounting */
3089: num_missing++;
3090: }
3091: }
3092: if ((parity_type == '1') && (c%2 == 1)) {
3093: /* Just did an even component, and we didn't
3094: bail.. reset the even_pair_failed flag,
3095: and go on to the next component.... */
3096: even_pair_failed = 0;
1.51 oster 3097: }
3098: }
3099: }
3100:
3101: clabel = cset->ac->clabel;
3102:
3103: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3104: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3105: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3106: /* XXX this needs to be made *much* more general */
3107: /* Too many failures */
3108: return(0);
3109: }
3110: /* otherwise, all is well, and we've got enough to take a kick
3111: at autoconfiguring this set */
3112: return(1);
1.48 oster 3113: }
3114:
3115: void
1.49 oster 3116: rf_create_configuration(ac,config,raidPtr)
1.48 oster 3117: RF_AutoConfig_t *ac;
3118: RF_Config_t *config;
3119: RF_Raid_t *raidPtr;
3120: {
3121: RF_ComponentLabel_t *clabel;
1.77 oster 3122: int i;
1.48 oster 3123:
3124: clabel = ac->clabel;
3125:
3126: /* 1. Fill in the common stuff */
3127: config->numRow = clabel->num_rows;
3128: config->numCol = clabel->num_columns;
3129: config->numSpare = 0; /* XXX should this be set here? */
3130: config->sectPerSU = clabel->sectPerSU;
3131: config->SUsPerPU = clabel->SUsPerPU;
3132: config->SUsPerRU = clabel->SUsPerRU;
3133: config->parityConfig = clabel->parityConfig;
3134: /* XXX... */
3135: strcpy(config->diskQueueType,"fifo");
3136: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3137: config->layoutSpecificSize = 0; /* XXX ?? */
3138:
3139: while(ac!=NULL) {
3140: /* row/col values will be in range due to the checks
3141: in reasonable_label() */
3142: strcpy(config->devnames[ac->clabel->row][ac->clabel->column],
3143: ac->devname);
3144: ac = ac->next;
3145: }
3146:
1.77 oster 3147: for(i=0;i<RF_MAXDBGV;i++) {
3148: config->debugVars[i][0] = NULL;
3149: }
1.48 oster 3150: }
3151:
3152: int
3153: rf_set_autoconfig(raidPtr, new_value)
3154: RF_Raid_t *raidPtr;
3155: int new_value;
3156: {
3157: RF_ComponentLabel_t clabel;
3158: struct vnode *vp;
3159: dev_t dev;
3160: int row, column;
3161:
1.54 oster 3162: raidPtr->autoconfigure = new_value;
1.48 oster 3163: for(row=0; row<raidPtr->numRow; row++) {
3164: for(column=0; column<raidPtr->numCol; column++) {
1.84 oster 3165: if (raidPtr->Disks[row][column].status ==
3166: rf_ds_optimal) {
3167: dev = raidPtr->Disks[row][column].dev;
3168: vp = raidPtr->raid_cinfo[row][column].ci_vp;
3169: raidread_component_label(dev, vp, &clabel);
3170: clabel.autoconfigure = new_value;
3171: raidwrite_component_label(dev, vp, &clabel);
3172: }
1.48 oster 3173: }
3174: }
3175: return(new_value);
3176: }
3177:
3178: int
3179: rf_set_rootpartition(raidPtr, new_value)
3180: RF_Raid_t *raidPtr;
3181: int new_value;
3182: {
3183: RF_ComponentLabel_t clabel;
3184: struct vnode *vp;
3185: dev_t dev;
3186: int row, column;
3187:
1.54 oster 3188: raidPtr->root_partition = new_value;
1.48 oster 3189: for(row=0; row<raidPtr->numRow; row++) {
3190: for(column=0; column<raidPtr->numCol; column++) {
1.84 oster 3191: if (raidPtr->Disks[row][column].status ==
3192: rf_ds_optimal) {
3193: dev = raidPtr->Disks[row][column].dev;
3194: vp = raidPtr->raid_cinfo[row][column].ci_vp;
3195: raidread_component_label(dev, vp, &clabel);
3196: clabel.root_partition = new_value;
3197: raidwrite_component_label(dev, vp, &clabel);
3198: }
1.48 oster 3199: }
3200: }
3201: return(new_value);
3202: }
3203:
3204: void
1.49 oster 3205: rf_release_all_vps(cset)
1.48 oster 3206: RF_ConfigSet_t *cset;
3207: {
3208: RF_AutoConfig_t *ac;
3209:
3210: ac = cset->ac;
3211: while(ac!=NULL) {
3212: /* Close the vp, and give it back */
3213: if (ac->vp) {
1.96 oster 3214: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.48 oster 3215: VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3216: vput(ac->vp);
1.86 oster 3217: ac->vp = NULL;
1.48 oster 3218: }
3219: ac = ac->next;
3220: }
3221: }
3222:
3223:
3224: void
1.49 oster 3225: rf_cleanup_config_set(cset)
1.48 oster 3226: RF_ConfigSet_t *cset;
3227: {
3228: RF_AutoConfig_t *ac;
3229: RF_AutoConfig_t *next_ac;
3230:
3231: ac = cset->ac;
3232: while(ac!=NULL) {
3233: next_ac = ac->next;
3234: /* nuke the label */
3235: free(ac->clabel, M_RAIDFRAME);
3236: /* cleanup the config structure */
3237: free(ac, M_RAIDFRAME);
3238: /* "next.." */
3239: ac = next_ac;
3240: }
3241: /* and, finally, nuke the config set */
3242: free(cset, M_RAIDFRAME);
3243: }
3244:
3245:
3246: void
3247: raid_init_component_label(raidPtr, clabel)
3248: RF_Raid_t *raidPtr;
3249: RF_ComponentLabel_t *clabel;
3250: {
3251: /* current version number */
3252: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3253: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3254: clabel->mod_counter = raidPtr->mod_counter;
3255: clabel->num_rows = raidPtr->numRow;
3256: clabel->num_columns = raidPtr->numCol;
3257: clabel->clean = RF_RAID_DIRTY; /* not clean */
3258: clabel->status = rf_ds_optimal; /* "It's good!" */
3259:
3260: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3261: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3262: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3263:
3264: clabel->blockSize = raidPtr->bytesPerSector;
3265: clabel->numBlocks = raidPtr->sectorsPerDisk;
3266:
1.48 oster 3267: /* XXX not portable */
3268: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3269: clabel->maxOutstanding = raidPtr->maxOutstanding;
3270: clabel->autoconfigure = raidPtr->autoconfigure;
3271: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3272: clabel->last_unit = raidPtr->raidid;
1.54 oster 3273: clabel->config_order = raidPtr->config_order;
1.51 oster 3274: }
3275:
3276: int
3277: rf_auto_config_set(cset,unit)
3278: RF_ConfigSet_t *cset;
3279: int *unit;
3280: {
3281: RF_Raid_t *raidPtr;
3282: RF_Config_t *config;
3283: int raidID;
3284: int retcode;
3285:
1.72 oster 3286: printf("RAID autoconfigure\n");
1.51 oster 3287:
3288: retcode = 0;
3289: *unit = -1;
3290:
3291: /* 1. Create a config structure */
3292:
3293: config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3294: M_RAIDFRAME,
3295: M_NOWAIT);
3296: if (config==NULL) {
3297: printf("Out of mem!?!?\n");
3298: /* XXX do something more intelligent here. */
3299: return(1);
3300: }
1.77 oster 3301:
3302: memset(config, 0, sizeof(RF_Config_t));
3303:
1.51 oster 3304: /* XXX raidID needs to be set correctly.. */
3305:
3306: /*
3307: 2. Figure out what RAID ID this one is supposed to live at
3308: See if we can get the same RAID dev that it was configured
3309: on last time..
3310: */
3311:
3312: raidID = cset->ac->clabel->last_unit;
1.52 oster 3313: if ((raidID < 0) || (raidID >= numraid)) {
1.51 oster 3314: /* let's not wander off into lala land. */
3315: raidID = numraid - 1;
3316: }
3317: if (raidPtrs[raidID]->valid != 0) {
3318:
3319: /*
3320: Nope... Go looking for an alternative...
3321: Start high so we don't immediately use raid0 if that's
3322: not taken.
3323: */
3324:
1.115 oster 3325: for(raidID = numraid - 1; raidID >= 0; raidID--) {
1.51 oster 3326: if (raidPtrs[raidID]->valid == 0) {
3327: /* can use this one! */
3328: break;
3329: }
3330: }
3331: }
3332:
3333: if (raidID < 0) {
3334: /* punt... */
3335: printf("Unable to auto configure this set!\n");
3336: printf("(Out of RAID devs!)\n");
3337: return(1);
3338: }
1.72 oster 3339: printf("Configuring raid%d:\n",raidID);
1.51 oster 3340: raidPtr = raidPtrs[raidID];
3341:
3342: /* XXX all this stuff should be done SOMEWHERE ELSE! */
3343: raidPtr->raidid = raidID;
3344: raidPtr->openings = RAIDOUTSTANDING;
3345:
3346: /* 3. Build the configuration structure */
3347: rf_create_configuration(cset->ac, config, raidPtr);
3348:
3349: /* 4. Do the configuration */
3350: retcode = rf_Configure(raidPtr, config, cset->ac);
3351:
3352: if (retcode == 0) {
1.61 oster 3353:
1.59 oster 3354: raidinit(raidPtrs[raidID]);
3355:
3356: rf_markalldirty(raidPtrs[raidID]);
1.54 oster 3357: raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
1.51 oster 3358: if (cset->ac->clabel->root_partition==1) {
3359: /* everything configured just fine. Make a note
3360: that this set is eligible to be root. */
3361: cset->rootable = 1;
1.54 oster 3362: /* XXX do this here? */
3363: raidPtrs[raidID]->root_partition = 1;
1.51 oster 3364: }
3365: }
3366:
3367: /* 5. Cleanup */
3368: free(config, M_RAIDFRAME);
3369:
3370: *unit = raidID;
3371: return(retcode);
1.99 oster 3372: }
3373:
3374: void
3375: rf_disk_unbusy(desc)
3376: RF_RaidAccessDesc_t *desc;
3377: {
3378: struct buf *bp;
3379:
3380: bp = (struct buf *)desc->bp;
3381: disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
3382: (bp->b_bcount - bp->b_resid));
1.13 oster 3383: }
CVSweb <webmaster@jp.NetBSD.org>