Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.190
1.190 ! oster 1: /* $NetBSD: rf_netbsdkintf.c,v 1.189 2005/09/24 22:51:55 oster Exp $ */
1.1 oster 2: /*-
3: * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
4: * All rights reserved.
5: *
6: * This code is derived from software contributed to The NetBSD Foundation
7: * by Greg Oster; Jason R. Thorpe.
8: *
9: * Redistribution and use in source and binary forms, with or without
10: * modification, are permitted provided that the following conditions
11: * are met:
12: * 1. Redistributions of source code must retain the above copyright
13: * notice, this list of conditions and the following disclaimer.
14: * 2. Redistributions in binary form must reproduce the above copyright
15: * notice, this list of conditions and the following disclaimer in the
16: * documentation and/or other materials provided with the distribution.
17: * 3. All advertising materials mentioning features or use of this software
18: * must display the following acknowledgement:
19: * This product includes software developed by the NetBSD
20: * Foundation, Inc. and its contributors.
21: * 4. Neither the name of The NetBSD Foundation nor the names of its
22: * contributors may be used to endorse or promote products derived
23: * from this software without specific prior written permission.
24: *
25: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35: * POSSIBILITY OF SUCH DAMAGE.
36: */
37:
38: /*
39: * Copyright (c) 1990, 1993
40: * The Regents of the University of California. All rights reserved.
41: *
42: * This code is derived from software contributed to Berkeley by
43: * the Systems Programming Group of the University of Utah Computer
44: * Science Department.
45: *
46: * Redistribution and use in source and binary forms, with or without
47: * modification, are permitted provided that the following conditions
48: * are met:
49: * 1. Redistributions of source code must retain the above copyright
50: * notice, this list of conditions and the following disclaimer.
51: * 2. Redistributions in binary form must reproduce the above copyright
52: * notice, this list of conditions and the following disclaimer in the
53: * documentation and/or other materials provided with the distribution.
1.162 agc 54: * 3. Neither the name of the University nor the names of its contributors
55: * may be used to endorse or promote products derived from this software
56: * without specific prior written permission.
57: *
58: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68: * SUCH DAMAGE.
69: *
70: * from: Utah $Hdr: cd.c 1.6 90/11/28$
71: *
72: * @(#)cd.c 8.2 (Berkeley) 11/16/93
73: */
74:
75: /*
76: * Copyright (c) 1988 University of Utah.
77: *
78: * This code is derived from software contributed to Berkeley by
79: * the Systems Programming Group of the University of Utah Computer
80: * Science Department.
81: *
82: * Redistribution and use in source and binary forms, with or without
83: * modification, are permitted provided that the following conditions
84: * are met:
85: * 1. Redistributions of source code must retain the above copyright
86: * notice, this list of conditions and the following disclaimer.
87: * 2. Redistributions in binary form must reproduce the above copyright
88: * notice, this list of conditions and the following disclaimer in the
89: * documentation and/or other materials provided with the distribution.
1.1 oster 90: * 3. All advertising materials mentioning features or use of this software
91: * must display the following acknowledgement:
92: * This product includes software developed by the University of
93: * California, Berkeley and its contributors.
94: * 4. Neither the name of the University nor the names of its contributors
95: * may be used to endorse or promote products derived from this software
96: * without specific prior written permission.
97: *
98: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108: * SUCH DAMAGE.
109: *
110: * from: Utah $Hdr: cd.c 1.6 90/11/28$
111: *
112: * @(#)cd.c 8.2 (Berkeley) 11/16/93
113: */
114:
115: /*
116: * Copyright (c) 1995 Carnegie-Mellon University.
117: * All rights reserved.
118: *
119: * Authors: Mark Holland, Jim Zelenka
120: *
121: * Permission to use, copy, modify and distribute this software and
122: * its documentation is hereby granted, provided that both the copyright
123: * notice and this permission notice appear in all copies of the
124: * software, derivative works or modified versions, and any portions
125: * thereof, and that both notices appear in supporting documentation.
126: *
127: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
128: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
129: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
130: *
131: * Carnegie Mellon requests users of this software to return to
132: *
133: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
134: * School of Computer Science
135: * Carnegie Mellon University
136: * Pittsburgh PA 15213-3890
137: *
138: * any improvements or extensions that they make and grant Carnegie the
139: * rights to redistribute these changes.
140: */
141:
142: /***********************************************************
143: *
144: * rf_kintf.c -- the kernel interface routines for RAIDframe
145: *
146: ***********************************************************/
1.112 lukem 147:
148: #include <sys/cdefs.h>
1.190 ! oster 149: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.189 2005/09/24 22:51:55 oster Exp $");
1.1 oster 150:
1.113 lukem 151: #include <sys/param.h>
1.1 oster 152: #include <sys/errno.h>
153: #include <sys/pool.h>
1.152 thorpej 154: #include <sys/proc.h>
1.1 oster 155: #include <sys/queue.h>
156: #include <sys/disk.h>
157: #include <sys/device.h>
158: #include <sys/stat.h>
159: #include <sys/ioctl.h>
160: #include <sys/fcntl.h>
161: #include <sys/systm.h>
162: #include <sys/namei.h>
163: #include <sys/vnode.h>
164: #include <sys/disklabel.h>
165: #include <sys/conf.h>
166: #include <sys/lock.h>
167: #include <sys/buf.h>
1.182 yamt 168: #include <sys/bufq.h>
1.1 oster 169: #include <sys/user.h>
1.65 oster 170: #include <sys/reboot.h>
1.8 oster 171:
1.110 oster 172: #include <dev/raidframe/raidframevar.h>
173: #include <dev/raidframe/raidframeio.h>
1.8 oster 174: #include "raid.h"
1.62 oster 175: #include "opt_raid_autoconfig.h"
1.1 oster 176: #include "rf_raid.h"
1.44 oster 177: #include "rf_copyback.h"
1.1 oster 178: #include "rf_dag.h"
179: #include "rf_dagflags.h"
1.99 oster 180: #include "rf_desc.h"
1.1 oster 181: #include "rf_diskqueue.h"
182: #include "rf_etimer.h"
183: #include "rf_general.h"
184: #include "rf_kintf.h"
185: #include "rf_options.h"
186: #include "rf_driver.h"
187: #include "rf_parityscan.h"
188: #include "rf_threadstuff.h"
189:
1.133 oster 190: #ifdef DEBUG
1.9 oster 191: int rf_kdebug_level = 0;
1.1 oster 192: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 193: #else /* DEBUG */
1.1 oster 194: #define db1_printf(a) { }
1.9 oster 195: #endif /* DEBUG */
1.1 oster 196:
1.9 oster 197: static RF_Raid_t **raidPtrs; /* global raid device descriptors */
1.1 oster 198:
1.11 oster 199: RF_DECLARE_STATIC_MUTEX(rf_sparet_wait_mutex)
1.1 oster 200:
1.10 oster 201: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
202: * spare table */
203: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
204: * installation process */
1.153 thorpej 205:
206: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10 oster 207:
1.1 oster 208: /* prototypes */
1.187 christos 209: static void KernelWakeupFunc(struct buf *);
210: static void InitBP(struct buf *, struct vnode *, unsigned,
211: dev_t, RF_SectorNum_t, RF_SectorCount_t, caddr_t, void (*) (struct buf *),
212: void *, int, struct proc *);
1.104 oster 213: static void raidinit(RF_Raid_t *);
1.1 oster 214:
1.104 oster 215: void raidattach(int);
1.130 gehenna 216:
217: dev_type_open(raidopen);
218: dev_type_close(raidclose);
219: dev_type_read(raidread);
220: dev_type_write(raidwrite);
221: dev_type_ioctl(raidioctl);
222: dev_type_strategy(raidstrategy);
223: dev_type_dump(raiddump);
224: dev_type_size(raidsize);
225:
226: const struct bdevsw raid_bdevsw = {
227: raidopen, raidclose, raidstrategy, raidioctl,
228: raiddump, raidsize, D_DISK
229: };
230:
231: const struct cdevsw raid_cdevsw = {
232: raidopen, raidclose, raidread, raidwrite, raidioctl,
1.144 jdolecek 233: nostop, notty, nopoll, nommap, nokqfilter, D_DISK
1.130 gehenna 234: };
1.1 oster 235:
236: /*
237: * Pilfered from ccd.c
238: */
239:
1.10 oster 240: struct raidbuf {
241: struct buf rf_buf; /* new I/O buf. MUST BE FIRST!!! */
242: struct buf *rf_obp; /* ptr. to original I/O buf */
1.11 oster 243: RF_DiskQueueData_t *req;/* the request that this was part of.. */
1.10 oster 244: };
1.1 oster 245:
1.9 oster 246: /* XXX Not sure if the following should be replacing the raidPtrs above,
1.186 perry 247: or if it should be used in conjunction with that...
1.59 oster 248: */
1.1 oster 249:
1.10 oster 250: struct raid_softc {
251: int sc_flags; /* flags */
252: int sc_cflags; /* configuration flags */
1.11 oster 253: size_t sc_size; /* size of the raid device */
1.10 oster 254: char sc_xname[20]; /* XXX external name */
255: struct disk sc_dkdev; /* generic disk device info */
1.125 hannken 256: struct bufq_state buf_queue; /* used for the device queue */
1.10 oster 257: };
1.1 oster 258: /* sc_flags */
259: #define RAIDF_INITED 0x01 /* unit has been initialized */
260: #define RAIDF_WLABEL 0x02 /* label area is writable */
261: #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
262: #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
263: #define RAIDF_LOCKED 0x80 /* unit is locked */
264:
265: #define raidunit(x) DISKUNIT(x)
1.48 oster 266: int numraid = 0;
1.1 oster 267:
1.186 perry 268: /*
269: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
270: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 271: * kernel memory, especially on writes, and in degraded mode reads.
1.186 perry 272: *
273: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
274: * a single 64K write will typically require 64K for the old data,
275: * 64K for the old parity, and 64K for the new parity, for a total
1.28 oster 276: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 277: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 278: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186 perry 279: *
1.28 oster 280: * Now in degraded mode, for example, a 64K read on the above setup may
1.186 perry 281: * require data reconstruction, which will require *all* of the 4 remaining
1.28 oster 282: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 283: */
284:
285: #ifndef RAIDOUTSTANDING
1.28 oster 286: #define RAIDOUTSTANDING 6
1.20 oster 287: #endif
288:
1.1 oster 289: #define RAIDLABELDEV(dev) \
290: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
291:
292: /* declared here, and made public, for the benefit of KVM stuff.. */
1.10 oster 293: struct raid_softc *raid_softc;
1.9 oster 294:
1.186 perry 295: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
1.104 oster 296: struct disklabel *);
297: static void raidgetdisklabel(dev_t);
298: static void raidmakedisklabel(struct raid_softc *);
1.1 oster 299:
1.104 oster 300: static int raidlock(struct raid_softc *);
301: static void raidunlock(struct raid_softc *);
1.1 oster 302:
1.104 oster 303: static void rf_markalldirty(RF_Raid_t *);
1.48 oster 304:
305: struct device *raidrootdev;
1.1 oster 306:
1.104 oster 307: void rf_ReconThread(struct rf_recon_req *);
308: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
309: void rf_CopybackThread(RF_Raid_t *raidPtr);
310: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
1.142 thorpej 311: int rf_autoconfig(struct device *self);
312: void rf_buildroothack(RF_ConfigSet_t *);
1.104 oster 313:
314: RF_AutoConfig_t *rf_find_raid_components(void);
315: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
316: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
317: static int rf_reasonable_label(RF_ComponentLabel_t *);
318: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
319: int rf_set_autoconfig(RF_Raid_t *, int);
320: int rf_set_rootpartition(RF_Raid_t *, int);
321: void rf_release_all_vps(RF_ConfigSet_t *);
322: void rf_cleanup_config_set(RF_ConfigSet_t *);
323: int rf_have_enough_components(RF_ConfigSet_t *);
324: int rf_auto_config_set(RF_ConfigSet_t *, int *);
1.48 oster 325:
326: static int raidautoconfig = 0; /* Debugging, mostly. Set to 0 to not
1.62 oster 327: allow autoconfig to take place.
328: Note that this is overridden by having
1.186 perry 329: RAID_AUTOCONFIG as an option in the
1.62 oster 330: kernel config file. */
1.37 oster 331:
1.177 oster 332: struct RF_Pools_s rf_pools;
333:
1.10 oster 334: void
1.169 oster 335: raidattach(int num)
1.1 oster 336: {
1.14 oster 337: int raidID;
338: int i, rc;
1.1 oster 339:
340: #ifdef DEBUG
1.9 oster 341: printf("raidattach: Asked for %d units\n", num);
1.1 oster 342: #endif
343:
344: if (num <= 0) {
345: #ifdef DIAGNOSTIC
346: panic("raidattach: count <= 0");
347: #endif
348: return;
349: }
1.9 oster 350: /* This is where all the initialization stuff gets done. */
1.1 oster 351:
1.50 oster 352: numraid = num;
353:
1.1 oster 354: /* Make some space for requested number of units... */
355:
1.167 oster 356: RF_Malloc(raidPtrs, num * sizeof(RF_Raid_t *), (RF_Raid_t **));
1.1 oster 357: if (raidPtrs == NULL) {
1.141 provos 358: panic("raidPtrs is NULL!!");
1.1 oster 359: }
1.116 thorpej 360:
361: /* Initialize the component buffer pool. */
1.177 oster 362: rf_pool_init(&rf_pools.cbuf, sizeof(struct raidbuf),
363: "raidpl", num * RAIDOUTSTANDING,
364: 2 * num * RAIDOUTSTANDING);
1.116 thorpej 365:
1.168 oster 366: rf_mutex_init(&rf_sparet_wait_mutex);
1.14 oster 367:
368: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
369:
1.58 oster 370: for (i = 0; i < num; i++)
1.14 oster 371: raidPtrs[i] = NULL;
372: rc = rf_BootRaidframe();
373: if (rc == 0)
374: printf("Kernelized RAIDframe activated\n");
375: else
1.141 provos 376: panic("Serious error booting RAID!!");
1.14 oster 377:
1.9 oster 378: /* put together some datastructures like the CCD device does.. This
379: * lets us lock the device and what-not when it gets opened. */
1.1 oster 380:
381: raid_softc = (struct raid_softc *)
1.48 oster 382: malloc(num * sizeof(struct raid_softc),
383: M_RAIDFRAME, M_NOWAIT);
1.1 oster 384: if (raid_softc == NULL) {
385: printf("WARNING: no memory for RAIDframe driver\n");
386: return;
387: }
1.50 oster 388:
1.108 thorpej 389: memset(raid_softc, 0, num * sizeof(struct raid_softc));
1.34 oster 390:
1.48 oster 391: raidrootdev = (struct device *)malloc(num * sizeof(struct device),
392: M_RAIDFRAME, M_NOWAIT);
393: if (raidrootdev == NULL) {
1.141 provos 394: panic("No memory for RAIDframe driver!!?!?!");
1.48 oster 395: }
396:
1.9 oster 397: for (raidID = 0; raidID < num; raidID++) {
1.126 hannken 398: bufq_alloc(&raid_softc[raidID].buf_queue, BUFQ_FCFS);
1.188 yamt 399: pseudo_disk_init(&raid_softc[raidID].sc_dkdev);
1.48 oster 400:
401: raidrootdev[raidID].dv_class = DV_DISK;
402: raidrootdev[raidID].dv_cfdata = NULL;
403: raidrootdev[raidID].dv_unit = raidID;
404: raidrootdev[raidID].dv_parent = NULL;
405: raidrootdev[raidID].dv_flags = 0;
1.179 itojun 406: snprintf(raidrootdev[raidID].dv_xname,
407: sizeof(raidrootdev[raidID].dv_xname), "raid%d", raidID);
1.48 oster 408:
1.167 oster 409: RF_Malloc(raidPtrs[raidID], sizeof(RF_Raid_t),
1.11 oster 410: (RF_Raid_t *));
1.9 oster 411: if (raidPtrs[raidID] == NULL) {
1.39 oster 412: printf("WARNING: raidPtrs[%d] is NULL\n", raidID);
413: numraid = raidID;
414: return;
1.1 oster 415: }
416: }
1.48 oster 417:
1.114 lukem 418: #ifdef RAID_AUTOCONFIG
1.62 oster 419: raidautoconfig = 1;
420: #endif
421:
1.142 thorpej 422: /*
423: * Register a finalizer which will be used to auto-config RAID
424: * sets once all real hardware devices have been found.
425: */
426: if (config_finalize_register(NULL, rf_autoconfig) != 0)
427: printf("WARNING: unable to register RAIDframe finalizer\n");
428: }
429:
430: int
431: rf_autoconfig(struct device *self)
432: {
433: RF_AutoConfig_t *ac_list;
434: RF_ConfigSet_t *config_sets;
435:
436: if (raidautoconfig == 0)
437: return (0);
438:
439: /* XXX This code can only be run once. */
440: raidautoconfig = 0;
441:
1.48 oster 442: /* 1. locate all RAID components on the system */
1.142 thorpej 443: #ifdef DEBUG
444: printf("Searching for RAID components...\n");
1.48 oster 445: #endif
446: ac_list = rf_find_raid_components();
447:
1.142 thorpej 448: /* 2. Sort them into their respective sets. */
1.48 oster 449: config_sets = rf_create_auto_sets(ac_list);
450:
1.142 thorpej 451: /*
452: * 3. Evaluate each set andconfigure the valid ones.
453: * This gets done in rf_buildroothack().
454: */
455: rf_buildroothack(config_sets);
1.48 oster 456:
1.142 thorpej 457: return (1);
1.48 oster 458: }
459:
460: void
1.142 thorpej 461: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48 oster 462: {
463: RF_ConfigSet_t *cset;
464: RF_ConfigSet_t *next_cset;
1.51 oster 465: int retcode;
1.48 oster 466: int raidID;
1.51 oster 467: int rootID;
468: int num_root;
1.48 oster 469:
1.101 oster 470: rootID = 0;
1.51 oster 471: num_root = 0;
1.48 oster 472: cset = config_sets;
473: while(cset != NULL ) {
474: next_cset = cset->next;
1.186 perry 475: if (rf_have_enough_components(cset) &&
1.51 oster 476: cset->ac->clabel->autoconfigure==1) {
477: retcode = rf_auto_config_set(cset,&raidID);
478: if (!retcode) {
479: if (cset->rootable) {
480: rootID = raidID;
481: num_root++;
482: }
483: } else {
484: /* The autoconfig didn't work :( */
485: #if DEBUG
486: printf("Autoconfig failed with code %d for raid%d\n", retcode, raidID);
487: #endif
488: rf_release_all_vps(cset);
1.48 oster 489: }
490: } else {
1.186 perry 491: /* we're not autoconfiguring this set...
1.48 oster 492: release the associated resources */
1.49 oster 493: rf_release_all_vps(cset);
1.48 oster 494: }
495: /* cleanup */
1.49 oster 496: rf_cleanup_config_set(cset);
1.48 oster 497: cset = next_cset;
498: }
1.122 oster 499:
500: /* we found something bootable... */
501:
502: if (num_root == 1) {
1.186 perry 503: booted_device = &raidrootdev[rootID];
1.122 oster 504: } else if (num_root > 1) {
505: /* we can't guess.. require the user to answer... */
506: boothowto |= RB_ASKNAME;
1.51 oster 507: }
1.1 oster 508: }
509:
510:
511: int
1.169 oster 512: raidsize(dev_t dev)
1.1 oster 513: {
514: struct raid_softc *rs;
515: struct disklabel *lp;
1.9 oster 516: int part, unit, omask, size;
1.1 oster 517:
518: unit = raidunit(dev);
519: if (unit >= numraid)
520: return (-1);
521: rs = &raid_softc[unit];
522:
523: if ((rs->sc_flags & RAIDF_INITED) == 0)
524: return (-1);
525:
526: part = DISKPART(dev);
527: omask = rs->sc_dkdev.dk_openmask & (1 << part);
528: lp = rs->sc_dkdev.dk_label;
529:
1.161 fvdl 530: if (omask == 0 && raidopen(dev, 0, S_IFBLK, curproc))
1.1 oster 531: return (-1);
532:
533: if (lp->d_partitions[part].p_fstype != FS_SWAP)
534: size = -1;
535: else
536: size = lp->d_partitions[part].p_size *
537: (lp->d_secsize / DEV_BSIZE);
538:
1.161 fvdl 539: if (omask == 0 && raidclose(dev, 0, S_IFBLK, curproc))
1.1 oster 540: return (-1);
541:
542: return (size);
543:
544: }
545:
546: int
1.169 oster 547: raiddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1.1 oster 548: {
549: /* Not implemented. */
550: return ENXIO;
551: }
552: /* ARGSUSED */
553: int
1.169 oster 554: raidopen(dev_t dev, int flags, int fmt, struct proc *p)
1.1 oster 555: {
1.9 oster 556: int unit = raidunit(dev);
1.1 oster 557: struct raid_softc *rs;
558: struct disklabel *lp;
1.9 oster 559: int part, pmask;
560: int error = 0;
561:
1.1 oster 562: if (unit >= numraid)
563: return (ENXIO);
564: rs = &raid_softc[unit];
565:
566: if ((error = raidlock(rs)) != 0)
1.9 oster 567: return (error);
1.1 oster 568: lp = rs->sc_dkdev.dk_label;
569:
570: part = DISKPART(dev);
571: pmask = (1 << part);
572:
573: if ((rs->sc_flags & RAIDF_INITED) &&
574: (rs->sc_dkdev.dk_openmask == 0))
1.9 oster 575: raidgetdisklabel(dev);
1.1 oster 576:
577: /* make sure that this partition exists */
578:
579: if (part != RAW_PART) {
580: if (((rs->sc_flags & RAIDF_INITED) == 0) ||
581: ((part >= lp->d_npartitions) ||
1.9 oster 582: (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1 oster 583: error = ENXIO;
584: raidunlock(rs);
1.9 oster 585: return (error);
1.1 oster 586: }
587: }
588: /* Prevent this unit from being unconfigured while open. */
589: switch (fmt) {
590: case S_IFCHR:
591: rs->sc_dkdev.dk_copenmask |= pmask;
592: break;
593:
594: case S_IFBLK:
595: rs->sc_dkdev.dk_bopenmask |= pmask;
596: break;
597: }
1.13 oster 598:
1.186 perry 599: if ((rs->sc_dkdev.dk_openmask == 0) &&
1.13 oster 600: ((rs->sc_flags & RAIDF_INITED) != 0)) {
601: /* First one... mark things as dirty... Note that we *MUST*
602: have done a configure before this. I DO NOT WANT TO BE
603: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
604: THAT THEY BELONG TOGETHER!!!!! */
605: /* XXX should check to see if we're only open for reading
606: here... If so, we needn't do this, but then need some
607: other way of keeping track of what's happened.. */
608:
609: rf_markalldirty( raidPtrs[unit] );
610: }
611:
612:
1.1 oster 613: rs->sc_dkdev.dk_openmask =
614: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
615:
616: raidunlock(rs);
617:
1.9 oster 618: return (error);
1.1 oster 619:
620:
621: }
622: /* ARGSUSED */
623: int
1.169 oster 624: raidclose(dev_t dev, int flags, int fmt, struct proc *p)
1.1 oster 625: {
1.9 oster 626: int unit = raidunit(dev);
1.1 oster 627: struct raid_softc *rs;
1.9 oster 628: int error = 0;
629: int part;
1.1 oster 630:
631: if (unit >= numraid)
632: return (ENXIO);
633: rs = &raid_softc[unit];
634:
635: if ((error = raidlock(rs)) != 0)
636: return (error);
637:
638: part = DISKPART(dev);
639:
640: /* ...that much closer to allowing unconfiguration... */
641: switch (fmt) {
642: case S_IFCHR:
643: rs->sc_dkdev.dk_copenmask &= ~(1 << part);
644: break;
645:
646: case S_IFBLK:
647: rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
648: break;
649: }
650: rs->sc_dkdev.dk_openmask =
651: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.186 perry 652:
1.13 oster 653: if ((rs->sc_dkdev.dk_openmask == 0) &&
654: ((rs->sc_flags & RAIDF_INITED) != 0)) {
1.186 perry 655: /* Last one... device is not unconfigured yet.
656: Device shutdown has taken care of setting the
657: clean bits if RAIDF_INITED is not set
1.13 oster 658: mark things as clean... */
1.147 oster 659:
1.91 oster 660: rf_update_component_labels(raidPtrs[unit],
661: RF_FINAL_COMPONENT_UPDATE);
1.107 oster 662: if (doing_shutdown) {
663: /* last one, and we're going down, so
664: lights out for this RAID set too. */
665: error = rf_Shutdown(raidPtrs[unit]);
1.186 perry 666:
1.107 oster 667: /* It's no longer initialized... */
668: rs->sc_flags &= ~RAIDF_INITED;
1.186 perry 669:
1.107 oster 670: /* Detach the disk. */
1.188 yamt 671: pseudo_disk_detach(&rs->sc_dkdev);
1.107 oster 672: }
1.13 oster 673: }
1.1 oster 674:
675: raidunlock(rs);
676: return (0);
677:
678: }
679:
680: void
1.169 oster 681: raidstrategy(struct buf *bp)
1.1 oster 682: {
1.74 augustss 683: int s;
1.1 oster 684:
685: unsigned int raidID = raidunit(bp->b_dev);
686: RF_Raid_t *raidPtr;
687: struct raid_softc *rs = &raid_softc[raidID];
1.9 oster 688: int wlabel;
1.1 oster 689:
1.30 oster 690: if ((rs->sc_flags & RAIDF_INITED) ==0) {
691: bp->b_error = ENXIO;
1.100 chs 692: bp->b_flags |= B_ERROR;
1.30 oster 693: bp->b_resid = bp->b_bcount;
694: biodone(bp);
1.1 oster 695: return;
1.30 oster 696: }
1.1 oster 697: if (raidID >= numraid || !raidPtrs[raidID]) {
698: bp->b_error = ENODEV;
699: bp->b_flags |= B_ERROR;
700: bp->b_resid = bp->b_bcount;
701: biodone(bp);
702: return;
703: }
704: raidPtr = raidPtrs[raidID];
705: if (!raidPtr->valid) {
706: bp->b_error = ENODEV;
707: bp->b_flags |= B_ERROR;
708: bp->b_resid = bp->b_bcount;
709: biodone(bp);
710: return;
711: }
712: if (bp->b_bcount == 0) {
713: db1_printf(("b_bcount is zero..\n"));
714: biodone(bp);
715: return;
716: }
717:
718: /*
719: * Do bounds checking and adjust transfer. If there's an
720: * error, the bounds check will flag that for us.
721: */
722:
1.9 oster 723: wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.1 oster 724: if (DISKPART(bp->b_dev) != RAW_PART)
1.159 thorpej 725: if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
1.1 oster 726: db1_printf(("Bounds check failed!!:%d %d\n",
1.9 oster 727: (int) bp->b_blkno, (int) wlabel));
1.1 oster 728: biodone(bp);
729: return;
730: }
1.34 oster 731: s = splbio();
1.1 oster 732:
733: bp->b_resid = 0;
1.34 oster 734:
735: /* stuff it onto our queue */
1.125 hannken 736: BUFQ_PUT(&rs->buf_queue, bp);
1.34 oster 737:
1.190 ! oster 738: /* scheduled the IO to happen at the next convenient time */
! 739: wakeup(&(raidPtrs[raidID]->iodone));
1.34 oster 740:
1.1 oster 741: splx(s);
742: }
743: /* ARGSUSED */
744: int
1.169 oster 745: raidread(dev_t dev, struct uio *uio, int flags)
1.1 oster 746: {
1.9 oster 747: int unit = raidunit(dev);
1.1 oster 748: struct raid_softc *rs;
749:
750: if (unit >= numraid)
751: return (ENXIO);
752: rs = &raid_softc[unit];
753:
754: if ((rs->sc_flags & RAIDF_INITED) == 0)
755: return (ENXIO);
756:
757: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
758:
759: }
760: /* ARGSUSED */
761: int
1.169 oster 762: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1 oster 763: {
1.9 oster 764: int unit = raidunit(dev);
1.1 oster 765: struct raid_softc *rs;
766:
767: if (unit >= numraid)
768: return (ENXIO);
769: rs = &raid_softc[unit];
770:
771: if ((rs->sc_flags & RAIDF_INITED) == 0)
772: return (ENXIO);
1.147 oster 773:
1.1 oster 774: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
775:
776: }
777:
778: int
1.169 oster 779: raidioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
1.1 oster 780: {
1.9 oster 781: int unit = raidunit(dev);
782: int error = 0;
783: int part, pmask;
1.1 oster 784: struct raid_softc *rs;
785: RF_Config_t *k_cfg, *u_cfg;
1.42 oster 786: RF_Raid_t *raidPtr;
1.48 oster 787: RF_RaidDisk_t *diskPtr;
1.41 oster 788: RF_AccTotals_t *totals;
789: RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1 oster 790: u_char *specific_buf;
1.11 oster 791: int retcode = 0;
792: int column;
1.123 oster 793: int raidid;
1.1 oster 794: struct rf_recon_req *rrcopy, *rr;
1.48 oster 795: RF_ComponentLabel_t *clabel;
1.11 oster 796: RF_ComponentLabel_t ci_label;
1.48 oster 797: RF_ComponentLabel_t **clabel_ptr;
1.12 oster 798: RF_SingleComponent_t *sparePtr,*componentPtr;
799: RF_SingleComponent_t hot_spare;
800: RF_SingleComponent_t component;
1.83 oster 801: RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41 oster 802: int i, j, d;
1.102 fvdl 803: #ifdef __HAVE_OLD_DISKLABEL
804: struct disklabel newlabel;
805: #endif
1.1 oster 806:
807: if (unit >= numraid)
808: return (ENXIO);
809: rs = &raid_softc[unit];
1.42 oster 810: raidPtr = raidPtrs[unit];
1.1 oster 811:
1.9 oster 812: db1_printf(("raidioctl: %d %d %d %d\n", (int) dev,
813: (int) DISKPART(dev), (int) unit, (int) cmd));
1.1 oster 814:
815: /* Must be open for writes for these commands... */
816: switch (cmd) {
817: case DIOCSDINFO:
818: case DIOCWDINFO:
1.102 fvdl 819: #ifdef __HAVE_OLD_DISKLABEL
820: case ODIOCWDINFO:
821: case ODIOCSDINFO:
822: #endif
1.1 oster 823: case DIOCWLABEL:
824: if ((flag & FWRITE) == 0)
825: return (EBADF);
826: }
827:
828: /* Must be initialized for these... */
829: switch (cmd) {
830: case DIOCGDINFO:
831: case DIOCSDINFO:
832: case DIOCWDINFO:
1.102 fvdl 833: #ifdef __HAVE_OLD_DISKLABEL
834: case ODIOCGDINFO:
835: case ODIOCWDINFO:
836: case ODIOCSDINFO:
837: case ODIOCGDEFLABEL:
838: #endif
1.1 oster 839: case DIOCGPART:
840: case DIOCWLABEL:
841: case DIOCGDEFLABEL:
842: case RAIDFRAME_SHUTDOWN:
843: case RAIDFRAME_REWRITEPARITY:
844: case RAIDFRAME_GET_INFO:
845: case RAIDFRAME_RESET_ACCTOTALS:
846: case RAIDFRAME_GET_ACCTOTALS:
847: case RAIDFRAME_KEEP_ACCTOTALS:
848: case RAIDFRAME_GET_SIZE:
849: case RAIDFRAME_FAIL_DISK:
850: case RAIDFRAME_COPYBACK:
1.37 oster 851: case RAIDFRAME_CHECK_RECON_STATUS:
1.83 oster 852: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11 oster 853: case RAIDFRAME_GET_COMPONENT_LABEL:
854: case RAIDFRAME_SET_COMPONENT_LABEL:
855: case RAIDFRAME_ADD_HOT_SPARE:
856: case RAIDFRAME_REMOVE_HOT_SPARE:
857: case RAIDFRAME_INIT_LABELS:
1.12 oster 858: case RAIDFRAME_REBUILD_IN_PLACE:
1.23 oster 859: case RAIDFRAME_CHECK_PARITY:
1.37 oster 860: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83 oster 861: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37 oster 862: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83 oster 863: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48 oster 864: case RAIDFRAME_SET_AUTOCONFIG:
865: case RAIDFRAME_SET_ROOT:
1.73 oster 866: case RAIDFRAME_DELETE_COMPONENT:
867: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.1 oster 868: if ((rs->sc_flags & RAIDF_INITED) == 0)
869: return (ENXIO);
870: }
1.9 oster 871:
1.1 oster 872: switch (cmd) {
873:
874: /* configure the system */
875: case RAIDFRAME_CONFIGURE:
1.48 oster 876:
877: if (raidPtr->valid) {
878: /* There is a valid RAID set running on this unit! */
879: printf("raid%d: Device already configured!\n",unit);
1.66 oster 880: return(EINVAL);
1.48 oster 881: }
882:
1.1 oster 883: /* copy-in the configuration information */
884: /* data points to a pointer to the configuration structure */
1.43 oster 885:
1.9 oster 886: u_cfg = *((RF_Config_t **) data);
887: RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1 oster 888: if (k_cfg == NULL) {
1.9 oster 889: return (ENOMEM);
1.1 oster 890: }
1.156 dsl 891: retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1.1 oster 892: if (retcode) {
1.33 oster 893: RF_Free(k_cfg, sizeof(RF_Config_t));
1.46 oster 894: db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9 oster 895: retcode));
896: return (retcode);
1.1 oster 897: }
1.9 oster 898: /* allocate a buffer for the layout-specific data, and copy it
899: * in */
1.1 oster 900: if (k_cfg->layoutSpecificSize) {
1.9 oster 901: if (k_cfg->layoutSpecificSize > 10000) {
1.1 oster 902: /* sanity check */
1.33 oster 903: RF_Free(k_cfg, sizeof(RF_Config_t));
1.9 oster 904: return (EINVAL);
1.1 oster 905: }
1.9 oster 906: RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
907: (u_char *));
1.1 oster 908: if (specific_buf == NULL) {
1.9 oster 909: RF_Free(k_cfg, sizeof(RF_Config_t));
910: return (ENOMEM);
1.1 oster 911: }
1.156 dsl 912: retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1.9 oster 913: k_cfg->layoutSpecificSize);
1.1 oster 914: if (retcode) {
1.33 oster 915: RF_Free(k_cfg, sizeof(RF_Config_t));
1.186 perry 916: RF_Free(specific_buf,
1.42 oster 917: k_cfg->layoutSpecificSize);
1.46 oster 918: db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9 oster 919: retcode));
920: return (retcode);
1.1 oster 921: }
1.9 oster 922: } else
923: specific_buf = NULL;
1.1 oster 924: k_cfg->layoutSpecific = specific_buf;
1.9 oster 925:
926: /* should do some kind of sanity check on the configuration.
927: * Store the sum of all the bytes in the last byte? */
1.1 oster 928:
929: /* configure the system */
930:
1.48 oster 931: /*
932: * Clear the entire RAID descriptor, just to make sure
1.186 perry 933: * there is no stale data left in the case of a
934: * reconfiguration
1.48 oster 935: */
1.108 thorpej 936: memset((char *) raidPtr, 0, sizeof(RF_Raid_t));
1.42 oster 937: raidPtr->raidid = unit;
1.20 oster 938:
1.48 oster 939: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1 oster 940:
1.40 oster 941: if (retcode == 0) {
1.37 oster 942:
1.186 perry 943: /* allow this many simultaneous IO's to
1.40 oster 944: this RAID device */
1.42 oster 945: raidPtr->openings = RAIDOUTSTANDING;
1.186 perry 946:
1.59 oster 947: raidinit(raidPtr);
948: rf_markalldirty(raidPtr);
1.9 oster 949: }
1.1 oster 950: /* free the buffers. No return code here. */
951: if (k_cfg->layoutSpecificSize) {
1.9 oster 952: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1 oster 953: }
1.9 oster 954: RF_Free(k_cfg, sizeof(RF_Config_t));
955:
956: return (retcode);
957:
958: /* shutdown the system */
1.1 oster 959: case RAIDFRAME_SHUTDOWN:
1.9 oster 960:
961: if ((error = raidlock(rs)) != 0)
962: return (error);
1.1 oster 963:
964: /*
965: * If somebody has a partition mounted, we shouldn't
966: * shutdown.
967: */
968:
969: part = DISKPART(dev);
970: pmask = (1 << part);
1.9 oster 971: if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
972: ((rs->sc_dkdev.dk_bopenmask & pmask) &&
973: (rs->sc_dkdev.dk_copenmask & pmask))) {
974: raidunlock(rs);
975: return (EBUSY);
976: }
1.11 oster 977:
1.42 oster 978: retcode = rf_Shutdown(raidPtr);
1.1 oster 979:
980: /* It's no longer initialized... */
981: rs->sc_flags &= ~RAIDF_INITED;
1.16 oster 982:
1.9 oster 983: /* Detach the disk. */
1.189 oster 984: pseudo_disk_detach(&rs->sc_dkdev);
1.1 oster 985:
986: raidunlock(rs);
987:
1.9 oster 988: return (retcode);
1.11 oster 989: case RAIDFRAME_GET_COMPONENT_LABEL:
1.48 oster 990: clabel_ptr = (RF_ComponentLabel_t **) data;
1.11 oster 991: /* need to read the component label for the disk indicated
1.48 oster 992: by row,column in clabel */
1.11 oster 993:
1.186 perry 994: /* For practice, let's get it directly fromdisk, rather
1.11 oster 995: than from the in-core copy */
1.48 oster 996: RF_Malloc( clabel, sizeof( RF_ComponentLabel_t ),
1.11 oster 997: (RF_ComponentLabel_t *));
1.48 oster 998: if (clabel == NULL)
1.11 oster 999: return (ENOMEM);
1000:
1.108 thorpej 1001: memset((char *) clabel, 0, sizeof(RF_ComponentLabel_t));
1.186 perry 1002:
1003: retcode = copyin( *clabel_ptr, clabel,
1.11 oster 1004: sizeof(RF_ComponentLabel_t));
1005:
1006: if (retcode) {
1.48 oster 1007: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 1008: return(retcode);
1009: }
1010:
1.166 oster 1011: clabel->row = 0; /* Don't allow looking at anything else.*/
1012:
1.48 oster 1013: column = clabel->column;
1.26 oster 1014:
1.166 oster 1015: if ((column < 0) || (column >= raidPtr->numCol +
1.90 oster 1016: raidPtr->numSpare)) {
1.48 oster 1017: RF_Free( clabel, sizeof(RF_ComponentLabel_t));
1.26 oster 1018: return(EINVAL);
1.11 oster 1019: }
1020:
1.186 perry 1021: raidread_component_label(raidPtr->Disks[column].dev,
1022: raidPtr->raid_cinfo[column].ci_vp,
1.48 oster 1023: clabel );
1.11 oster 1024:
1.156 dsl 1025: retcode = copyout(clabel, *clabel_ptr,
1.11 oster 1026: sizeof(RF_ComponentLabel_t));
1.156 dsl 1027: RF_Free(clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 1028: return (retcode);
1029:
1030: case RAIDFRAME_SET_COMPONENT_LABEL:
1.48 oster 1031: clabel = (RF_ComponentLabel_t *) data;
1.11 oster 1032:
1033: /* XXX check the label for valid stuff... */
1034: /* Note that some things *should not* get modified --
1.186 perry 1035: the user should be re-initing the labels instead of
1.11 oster 1036: trying to patch things.
1037: */
1038:
1.123 oster 1039: raidid = raidPtr->raidid;
1.174 oster 1040: #if DEBUG
1.123 oster 1041: printf("raid%d: Got component label:\n", raidid);
1042: printf("raid%d: Version: %d\n", raidid, clabel->version);
1043: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1044: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1045: printf("raid%d: Column: %d\n", raidid, clabel->column);
1046: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1047: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1048: printf("raid%d: Status: %d\n", raidid, clabel->status);
1.174 oster 1049: #endif
1.166 oster 1050: clabel->row = 0;
1.48 oster 1051: column = clabel->column;
1.12 oster 1052:
1.166 oster 1053: if ((column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1054: return(EINVAL);
1.11 oster 1055: }
1.12 oster 1056:
1057: /* XXX this isn't allowed to do anything for now :-) */
1.48 oster 1058:
1059: /* XXX and before it is, we need to fill in the rest
1060: of the fields!?!?!?! */
1.12 oster 1061: #if 0
1.186 perry 1062: raidwrite_component_label(
1063: raidPtr->Disks[column].dev,
1064: raidPtr->raid_cinfo[column].ci_vp,
1.48 oster 1065: clabel );
1.12 oster 1066: #endif
1067: return (0);
1.11 oster 1068:
1.186 perry 1069: case RAIDFRAME_INIT_LABELS:
1.48 oster 1070: clabel = (RF_ComponentLabel_t *) data;
1.186 perry 1071: /*
1.11 oster 1072: we only want the serial number from
1073: the above. We get all the rest of the information
1074: from the config that was used to create this RAID
1.186 perry 1075: set.
1.11 oster 1076: */
1.12 oster 1077:
1.48 oster 1078: raidPtr->serial_number = clabel->serial_number;
1.186 perry 1079:
1.51 oster 1080: raid_init_component_label(raidPtr, &ci_label);
1081: ci_label.serial_number = clabel->serial_number;
1.166 oster 1082: ci_label.row = 0; /* we dont' pretend to support more */
1.11 oster 1083:
1.166 oster 1084: for(column=0;column<raidPtr->numCol;column++) {
1085: diskPtr = &raidPtr->Disks[column];
1086: if (!RF_DEAD_DISK(diskPtr->status)) {
1087: ci_label.partitionSize = diskPtr->partitionSize;
1088: ci_label.column = column;
1.186 perry 1089: raidwrite_component_label(
1090: raidPtr->Disks[column].dev,
1091: raidPtr->raid_cinfo[column].ci_vp,
1.166 oster 1092: &ci_label );
1.11 oster 1093: }
1094: }
1095:
1096: return (retcode);
1.48 oster 1097: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1098: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186 perry 1099: printf("raid%d: New autoconfig value is: %d\n",
1.123 oster 1100: raidPtr->raidid, d);
1.78 minoura 1101: *(int *) data = d;
1.48 oster 1102: return (retcode);
1103:
1104: case RAIDFRAME_SET_ROOT:
1.78 minoura 1105: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186 perry 1106: printf("raid%d: New rootpartition value is: %d\n",
1.123 oster 1107: raidPtr->raidid, d);
1.78 minoura 1108: *(int *) data = d;
1.48 oster 1109: return (retcode);
1.9 oster 1110:
1.1 oster 1111: /* initialize all parity */
1112: case RAIDFRAME_REWRITEPARITY:
1113:
1.42 oster 1114: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1115: /* Parity for RAID 0 is trivially correct */
1.42 oster 1116: raidPtr->parity_good = RF_RAID_CLEAN;
1.17 oster 1117: return(0);
1118: }
1.186 perry 1119:
1.42 oster 1120: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1121: /* Re-write is already in progress! */
1122: return(EINVAL);
1123: }
1.27 oster 1124:
1.42 oster 1125: retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37 oster 1126: rf_RewriteParityThread,
1.42 oster 1127: raidPtr,"raid_parity");
1.9 oster 1128: return (retcode);
1129:
1.11 oster 1130:
1131: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1132: sparePtr = (RF_SingleComponent_t *) data;
1133: memcpy( &hot_spare, sparePtr, sizeof(RF_SingleComponent_t));
1.42 oster 1134: retcode = rf_add_hot_spare(raidPtr, &hot_spare);
1.11 oster 1135: return(retcode);
1136:
1137: case RAIDFRAME_REMOVE_HOT_SPARE:
1.73 oster 1138: return(retcode);
1139:
1140: case RAIDFRAME_DELETE_COMPONENT:
1141: componentPtr = (RF_SingleComponent_t *)data;
1.186 perry 1142: memcpy( &component, componentPtr,
1.73 oster 1143: sizeof(RF_SingleComponent_t));
1144: retcode = rf_delete_component(raidPtr, &component);
1145: return(retcode);
1146:
1147: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1148: componentPtr = (RF_SingleComponent_t *)data;
1.186 perry 1149: memcpy( &component, componentPtr,
1.73 oster 1150: sizeof(RF_SingleComponent_t));
1151: retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1152: return(retcode);
1153:
1.12 oster 1154: case RAIDFRAME_REBUILD_IN_PLACE:
1.24 oster 1155:
1.42 oster 1156: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1157: /* Can't do this on a RAID 0!! */
1158: return(EINVAL);
1159: }
1160:
1.42 oster 1161: if (raidPtr->recon_in_progress == 1) {
1.37 oster 1162: /* a reconstruct is already in progress! */
1163: return(EINVAL);
1164: }
1165:
1.12 oster 1166: componentPtr = (RF_SingleComponent_t *) data;
1.186 perry 1167: memcpy( &component, componentPtr,
1.12 oster 1168: sizeof(RF_SingleComponent_t));
1.166 oster 1169: component.row = 0; /* we don't support any more */
1.12 oster 1170: column = component.column;
1.147 oster 1171:
1.166 oster 1172: if ((column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1173: return(EINVAL);
1174: }
1.37 oster 1175:
1.149 oster 1176: RF_LOCK_MUTEX(raidPtr->mutex);
1.166 oster 1177: if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1.186 perry 1178: (raidPtr->numFailures > 0)) {
1.149 oster 1179: /* XXX 0 above shouldn't be constant!!! */
1180: /* some component other than this has failed.
1181: Let's not make things worse than they already
1182: are... */
1183: printf("raid%d: Unable to reconstruct to disk at:\n",
1184: raidPtr->raidid);
1.166 oster 1185: printf("raid%d: Col: %d Too many failures.\n",
1186: raidPtr->raidid, column);
1.149 oster 1187: RF_UNLOCK_MUTEX(raidPtr->mutex);
1188: return (EINVAL);
1189: }
1.186 perry 1190: if (raidPtr->Disks[column].status ==
1.149 oster 1191: rf_ds_reconstructing) {
1192: printf("raid%d: Unable to reconstruct to disk at:\n",
1193: raidPtr->raidid);
1.166 oster 1194: printf("raid%d: Col: %d Reconstruction already occuring!\n", raidPtr->raidid, column);
1.186 perry 1195:
1.149 oster 1196: RF_UNLOCK_MUTEX(raidPtr->mutex);
1197: return (EINVAL);
1198: }
1.166 oster 1199: if (raidPtr->Disks[column].status == rf_ds_spared) {
1.149 oster 1200: RF_UNLOCK_MUTEX(raidPtr->mutex);
1201: return (EINVAL);
1202: }
1203: RF_UNLOCK_MUTEX(raidPtr->mutex);
1204:
1.37 oster 1205: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1206: if (rrcopy == NULL)
1207: return(ENOMEM);
1.37 oster 1208:
1.42 oster 1209: rrcopy->raidPtr = (void *) raidPtr;
1.37 oster 1210: rrcopy->col = column;
1211:
1.42 oster 1212: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1213: rf_ReconstructInPlaceThread,
1214: rrcopy,"raid_reconip");
1.12 oster 1215: return(retcode);
1216:
1.1 oster 1217: case RAIDFRAME_GET_INFO:
1.42 oster 1218: if (!raidPtr->valid)
1.41 oster 1219: return (ENODEV);
1220: ucfgp = (RF_DeviceConfig_t **) data;
1221: RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1222: (RF_DeviceConfig_t *));
1223: if (d_cfg == NULL)
1224: return (ENOMEM);
1.108 thorpej 1225: memset((char *) d_cfg, 0, sizeof(RF_DeviceConfig_t));
1.166 oster 1226: d_cfg->rows = 1; /* there is only 1 row now */
1.42 oster 1227: d_cfg->cols = raidPtr->numCol;
1.166 oster 1228: d_cfg->ndevs = raidPtr->numCol;
1.41 oster 1229: if (d_cfg->ndevs >= RF_MAX_DISKS) {
1230: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1231: return (ENOMEM);
1232: }
1.42 oster 1233: d_cfg->nspares = raidPtr->numSpare;
1.41 oster 1234: if (d_cfg->nspares >= RF_MAX_DISKS) {
1235: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1236: return (ENOMEM);
1237: }
1.42 oster 1238: d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41 oster 1239: d = 0;
1.166 oster 1240: for (j = 0; j < d_cfg->cols; j++) {
1241: d_cfg->devs[d] = raidPtr->Disks[j];
1242: d++;
1.41 oster 1243: }
1244: for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.166 oster 1245: d_cfg->spares[i] = raidPtr->Disks[j];
1.41 oster 1246: }
1.156 dsl 1247: retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1.41 oster 1248: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1249:
1250: return (retcode);
1.9 oster 1251:
1.22 oster 1252: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1253: *(int *) data = raidPtr->parity_good;
1.22 oster 1254: return (0);
1.41 oster 1255:
1.1 oster 1256: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1257: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41 oster 1258: return (0);
1.9 oster 1259:
1.1 oster 1260: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1261: totals = (RF_AccTotals_t *) data;
1.42 oster 1262: *totals = raidPtr->acc_totals;
1.41 oster 1263: return (0);
1.9 oster 1264:
1.1 oster 1265: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1266: raidPtr->keep_acc_totals = *(int *)data;
1.41 oster 1267: return (0);
1.9 oster 1268:
1.1 oster 1269: case RAIDFRAME_GET_SIZE:
1.42 oster 1270: *(int *) data = raidPtr->totalSectors;
1.9 oster 1271: return (0);
1.1 oster 1272:
1273: /* fail a disk & optionally start reconstruction */
1274: case RAIDFRAME_FAIL_DISK:
1.24 oster 1275:
1.42 oster 1276: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1277: /* Can't do this on a RAID 0!! */
1278: return(EINVAL);
1279: }
1280:
1.1 oster 1281: rr = (struct rf_recon_req *) data;
1.166 oster 1282: rr->row = 0;
1283: if (rr->col < 0 || rr->col >= raidPtr->numCol)
1.9 oster 1284: return (EINVAL);
1.149 oster 1285:
1286:
1287: RF_LOCK_MUTEX(raidPtr->mutex);
1.185 oster 1288: if (raidPtr->status == rf_rs_reconstructing) {
1289: /* you can't fail a disk while we're reconstructing! */
1290: /* XXX wrong for RAID6 */
1291: RF_UNLOCK_MUTEX(raidPtr->mutex);
1292: return (EINVAL);
1293: }
1.186 perry 1294: if ((raidPtr->Disks[rr->col].status ==
1295: rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1.149 oster 1296: /* some other component has failed. Let's not make
1297: things worse. XXX wrong for RAID6 */
1298: RF_UNLOCK_MUTEX(raidPtr->mutex);
1299: return (EINVAL);
1300: }
1.166 oster 1301: if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1.149 oster 1302: /* Can't fail a spared disk! */
1303: RF_UNLOCK_MUTEX(raidPtr->mutex);
1304: return (EINVAL);
1305: }
1306: RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1 oster 1307:
1.9 oster 1308: /* make a copy of the recon request so that we don't rely on
1309: * the user's buffer */
1.1 oster 1310: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1311: if (rrcopy == NULL)
1312: return(ENOMEM);
1.118 wiz 1313: memcpy(rrcopy, rr, sizeof(*rr));
1.42 oster 1314: rrcopy->raidPtr = (void *) raidPtr;
1.1 oster 1315:
1.42 oster 1316: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1317: rf_ReconThread,
1318: rrcopy,"raid_recon");
1.9 oster 1319: return (0);
1320:
1321: /* invoke a copyback operation after recon on whatever disk
1322: * needs it, if any */
1323: case RAIDFRAME_COPYBACK:
1.24 oster 1324:
1.42 oster 1325: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1326: /* This makes no sense on a RAID 0!! */
1327: return(EINVAL);
1328: }
1329:
1.42 oster 1330: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1331: /* Copyback is already in progress! */
1332: return(EINVAL);
1333: }
1.27 oster 1334:
1.42 oster 1335: retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37 oster 1336: rf_CopybackThread,
1.42 oster 1337: raidPtr,"raid_copyback");
1.37 oster 1338: return (retcode);
1.9 oster 1339:
1.1 oster 1340: /* return the percentage completion of reconstruction */
1.37 oster 1341: case RAIDFRAME_CHECK_RECON_STATUS:
1.42 oster 1342: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71 oster 1343: /* This makes no sense on a RAID 0, so tell the
1344: user it's done. */
1345: *(int *) data = 100;
1346: return(0);
1.24 oster 1347: }
1.166 oster 1348: if (raidPtr->status != rf_rs_reconstructing)
1.1 oster 1349: *(int *) data = 100;
1.171 oster 1350: else {
1351: if (raidPtr->reconControl->numRUsTotal > 0) {
1352: *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1353: } else {
1354: *(int *) data = 0;
1355: }
1356: }
1.9 oster 1357: return (0);
1.83 oster 1358: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1359: progressInfoPtr = (RF_ProgressInfo_t **) data;
1.166 oster 1360: if (raidPtr->status != rf_rs_reconstructing) {
1.83 oster 1361: progressInfo.remaining = 0;
1362: progressInfo.completed = 100;
1363: progressInfo.total = 100;
1364: } else {
1.186 perry 1365: progressInfo.total =
1.166 oster 1366: raidPtr->reconControl->numRUsTotal;
1.186 perry 1367: progressInfo.completed =
1.166 oster 1368: raidPtr->reconControl->numRUsComplete;
1.83 oster 1369: progressInfo.remaining = progressInfo.total -
1370: progressInfo.completed;
1371: }
1.156 dsl 1372: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1373: sizeof(RF_ProgressInfo_t));
1374: return (retcode);
1.9 oster 1375:
1.37 oster 1376: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1377: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1378: /* This makes no sense on a RAID 0, so tell the
1379: user it's done. */
1380: *(int *) data = 100;
1381: return(0);
1.37 oster 1382: }
1.42 oster 1383: if (raidPtr->parity_rewrite_in_progress == 1) {
1.186 perry 1384: *(int *) data = 100 *
1385: raidPtr->parity_rewrite_stripes_done /
1.83 oster 1386: raidPtr->Layout.numStripe;
1.37 oster 1387: } else {
1388: *(int *) data = 100;
1389: }
1390: return (0);
1391:
1.83 oster 1392: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1393: progressInfoPtr = (RF_ProgressInfo_t **) data;
1394: if (raidPtr->parity_rewrite_in_progress == 1) {
1395: progressInfo.total = raidPtr->Layout.numStripe;
1.186 perry 1396: progressInfo.completed =
1.83 oster 1397: raidPtr->parity_rewrite_stripes_done;
1398: progressInfo.remaining = progressInfo.total -
1399: progressInfo.completed;
1400: } else {
1401: progressInfo.remaining = 0;
1402: progressInfo.completed = 100;
1403: progressInfo.total = 100;
1404: }
1.156 dsl 1405: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1406: sizeof(RF_ProgressInfo_t));
1407: return (retcode);
1408:
1.37 oster 1409: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1410: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1411: /* This makes no sense on a RAID 0 */
1.83 oster 1412: *(int *) data = 100;
1413: return(0);
1.37 oster 1414: }
1.42 oster 1415: if (raidPtr->copyback_in_progress == 1) {
1416: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1417: raidPtr->Layout.numStripe;
1.37 oster 1418: } else {
1419: *(int *) data = 100;
1420: }
1421: return (0);
1422:
1.83 oster 1423: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93 oster 1424: progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83 oster 1425: if (raidPtr->copyback_in_progress == 1) {
1426: progressInfo.total = raidPtr->Layout.numStripe;
1.186 perry 1427: progressInfo.completed =
1.93 oster 1428: raidPtr->copyback_stripes_done;
1.83 oster 1429: progressInfo.remaining = progressInfo.total -
1430: progressInfo.completed;
1431: } else {
1432: progressInfo.remaining = 0;
1433: progressInfo.completed = 100;
1434: progressInfo.total = 100;
1435: }
1.156 dsl 1436: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1437: sizeof(RF_ProgressInfo_t));
1438: return (retcode);
1.37 oster 1439:
1.9 oster 1440: /* the sparetable daemon calls this to wait for the kernel to
1441: * need a spare table. this ioctl does not return until a
1442: * spare table is needed. XXX -- calling mpsleep here in the
1443: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1444: * -- I should either compute the spare table in the kernel,
1445: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1446: * character device) for delivering the table -- XXX */
1.1 oster 1447: #if 0
1448: case RAIDFRAME_SPARET_WAIT:
1449: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1450: while (!rf_sparet_wait_queue)
1451: mpsleep(&rf_sparet_wait_queue, (PZERO + 1) | PCATCH, "sparet wait", 0, (void *) simple_lock_addr(rf_sparet_wait_mutex), MS_LOCK_SIMPLE);
1.1 oster 1452: waitreq = rf_sparet_wait_queue;
1453: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1454: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1455:
1.42 oster 1456: /* structure assignment */
1.186 perry 1457: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1458:
1.1 oster 1459: RF_Free(waitreq, sizeof(*waitreq));
1.9 oster 1460: return (0);
1461:
1462: /* wakes up a process waiting on SPARET_WAIT and puts an error
1463: * code in it that will cause the dameon to exit */
1.1 oster 1464: case RAIDFRAME_ABORT_SPARET_WAIT:
1465: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1466: waitreq->fcol = -1;
1467: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1468: waitreq->next = rf_sparet_wait_queue;
1469: rf_sparet_wait_queue = waitreq;
1470: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1471: wakeup(&rf_sparet_wait_queue);
1.9 oster 1472: return (0);
1.1 oster 1473:
1.9 oster 1474: /* used by the spare table daemon to deliver a spare table
1475: * into the kernel */
1.1 oster 1476: case RAIDFRAME_SEND_SPARET:
1.9 oster 1477:
1.1 oster 1478: /* install the spare table */
1.42 oster 1479: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1480:
1481: /* respond to the requestor. the return status of the spare
1482: * table installation is passed in the "fcol" field */
1.1 oster 1483: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1484: waitreq->fcol = retcode;
1485: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1486: waitreq->next = rf_sparet_resp_queue;
1487: rf_sparet_resp_queue = waitreq;
1488: wakeup(&rf_sparet_resp_queue);
1489: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1.9 oster 1490:
1491: return (retcode);
1.1 oster 1492: #endif
1493:
1.9 oster 1494: default:
1.36 oster 1495: break; /* fall through to the os-specific code below */
1.1 oster 1496:
1497: }
1.9 oster 1498:
1.42 oster 1499: if (!raidPtr->valid)
1.9 oster 1500: return (EINVAL);
1501:
1.1 oster 1502: /*
1503: * Add support for "regular" device ioctls here.
1504: */
1.9 oster 1505:
1.1 oster 1506: switch (cmd) {
1507: case DIOCGDINFO:
1.9 oster 1508: *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1 oster 1509: break;
1.102 fvdl 1510: #ifdef __HAVE_OLD_DISKLABEL
1511: case ODIOCGDINFO:
1512: newlabel = *(rs->sc_dkdev.dk_label);
1513: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1514: return ENOTTY;
1.102 fvdl 1515: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1516: break;
1517: #endif
1.1 oster 1518:
1519: case DIOCGPART:
1.9 oster 1520: ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1521: ((struct partinfo *) data)->part =
1.1 oster 1522: &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1523: break;
1524:
1525: case DIOCWDINFO:
1526: case DIOCSDINFO:
1.102 fvdl 1527: #ifdef __HAVE_OLD_DISKLABEL
1528: case ODIOCWDINFO:
1529: case ODIOCSDINFO:
1530: #endif
1531: {
1532: struct disklabel *lp;
1533: #ifdef __HAVE_OLD_DISKLABEL
1534: if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1535: memset(&newlabel, 0, sizeof newlabel);
1536: memcpy(&newlabel, data, sizeof (struct olddisklabel));
1537: lp = &newlabel;
1538: } else
1539: #endif
1540: lp = (struct disklabel *)data;
1541:
1.1 oster 1542: if ((error = raidlock(rs)) != 0)
1543: return (error);
1544:
1545: rs->sc_flags |= RAIDF_LABELLING;
1546:
1547: error = setdisklabel(rs->sc_dkdev.dk_label,
1.102 fvdl 1548: lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1 oster 1549: if (error == 0) {
1.102 fvdl 1550: if (cmd == DIOCWDINFO
1551: #ifdef __HAVE_OLD_DISKLABEL
1552: || cmd == ODIOCWDINFO
1553: #endif
1554: )
1.1 oster 1555: error = writedisklabel(RAIDLABELDEV(dev),
1556: raidstrategy, rs->sc_dkdev.dk_label,
1557: rs->sc_dkdev.dk_cpulabel);
1558: }
1559: rs->sc_flags &= ~RAIDF_LABELLING;
1560:
1561: raidunlock(rs);
1562:
1563: if (error)
1564: return (error);
1565: break;
1.102 fvdl 1566: }
1.1 oster 1567:
1568: case DIOCWLABEL:
1.9 oster 1569: if (*(int *) data != 0)
1.1 oster 1570: rs->sc_flags |= RAIDF_WLABEL;
1571: else
1572: rs->sc_flags &= ~RAIDF_WLABEL;
1573: break;
1574:
1575: case DIOCGDEFLABEL:
1.102 fvdl 1576: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1 oster 1577: break;
1.102 fvdl 1578:
1579: #ifdef __HAVE_OLD_DISKLABEL
1580: case ODIOCGDEFLABEL:
1581: raidgetdefaultlabel(raidPtr, rs, &newlabel);
1582: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1583: return ENOTTY;
1.102 fvdl 1584: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1585: break;
1586: #endif
1.1 oster 1587:
1588: default:
1.39 oster 1589: retcode = ENOTTY;
1.1 oster 1590: }
1.9 oster 1591: return (retcode);
1.1 oster 1592:
1593: }
1594:
1595:
1.9 oster 1596: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1597: RAIDframe device. */
1598:
1599:
1.59 oster 1600: static void
1.169 oster 1601: raidinit(RF_Raid_t *raidPtr)
1.1 oster 1602: {
1603: struct raid_softc *rs;
1.59 oster 1604: int unit;
1.1 oster 1605:
1.59 oster 1606: unit = raidPtr->raidid;
1.1 oster 1607:
1608: rs = &raid_softc[unit];
1609:
1610: /* XXX should check return code first... */
1611: rs->sc_flags |= RAIDF_INITED;
1612:
1.179 itojun 1613: /* XXX doesn't check bounds. */
1614: snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1.1 oster 1615:
1.9 oster 1616: rs->sc_dkdev.dk_name = rs->sc_xname;
1.11 oster 1617:
1.1 oster 1618: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1619: * other things, so it's critical to call this *BEFORE* we try putzing
1620: * with disklabels. */
1.11 oster 1621:
1.188 yamt 1622: pseudo_disk_attach(&rs->sc_dkdev);
1.1 oster 1623:
1624: /* XXX There may be a weird interaction here between this, and
1.9 oster 1625: * protectedSectors, as used in RAIDframe. */
1.11 oster 1626:
1.9 oster 1627: rs->sc_size = raidPtr->totalSectors;
1.1 oster 1628: }
1.150 oster 1629: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1 oster 1630: /* wake up the daemon & tell it to get us a spare table
1631: * XXX
1.9 oster 1632: * the entries in the queues should be tagged with the raidPtr
1.186 perry 1633: * so that in the extremely rare case that two recons happen at once,
1.11 oster 1634: * we know for which device were requesting a spare table
1.1 oster 1635: * XXX
1.186 perry 1636: *
1.39 oster 1637: * XXX This code is not currently used. GO
1.1 oster 1638: */
1.186 perry 1639: int
1.169 oster 1640: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9 oster 1641: {
1642: int retcode;
1643:
1644: RF_LOCK_MUTEX(rf_sparet_wait_mutex);
1645: req->next = rf_sparet_wait_queue;
1646: rf_sparet_wait_queue = req;
1647: wakeup(&rf_sparet_wait_queue);
1648:
1649: /* mpsleep unlocks the mutex */
1650: while (!rf_sparet_resp_queue) {
1.15 oster 1651: tsleep(&rf_sparet_resp_queue, PRIBIO,
1.9 oster 1652: "raidframe getsparetable", 0);
1653: }
1654: req = rf_sparet_resp_queue;
1655: rf_sparet_resp_queue = req->next;
1656: RF_UNLOCK_MUTEX(rf_sparet_wait_mutex);
1657:
1658: retcode = req->fcol;
1659: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1660: * alloc'd */
1661: return (retcode);
1.1 oster 1662: }
1.150 oster 1663: #endif
1.39 oster 1664:
1.186 perry 1665: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11 oster 1666: * bp & passes it down.
1.1 oster 1667: * any calls originating in the kernel must use non-blocking I/O
1668: * do some extra sanity checking to return "appropriate" error values for
1669: * certain conditions (to make some standard utilities work)
1.186 perry 1670: *
1.34 oster 1671: * Formerly known as: rf_DoAccessKernel
1.1 oster 1672: */
1.34 oster 1673: void
1.169 oster 1674: raidstart(RF_Raid_t *raidPtr)
1.1 oster 1675: {
1676: RF_SectorCount_t num_blocks, pb, sum;
1677: RF_RaidAddr_t raid_addr;
1678: struct partition *pp;
1.9 oster 1679: daddr_t blocknum;
1680: int unit;
1.1 oster 1681: struct raid_softc *rs;
1.9 oster 1682: int do_async;
1.34 oster 1683: struct buf *bp;
1.180 oster 1684: int rc;
1.1 oster 1685:
1686: unit = raidPtr->raidid;
1687: rs = &raid_softc[unit];
1.186 perry 1688:
1.56 oster 1689: /* quick check to see if anything has died recently */
1690: RF_LOCK_MUTEX(raidPtr->mutex);
1691: if (raidPtr->numNewFailures > 0) {
1.151 oster 1692: RF_UNLOCK_MUTEX(raidPtr->mutex);
1.186 perry 1693: rf_update_component_labels(raidPtr,
1.91 oster 1694: RF_NORMAL_COMPONENT_UPDATE);
1.151 oster 1695: RF_LOCK_MUTEX(raidPtr->mutex);
1.56 oster 1696: raidPtr->numNewFailures--;
1697: }
1698:
1.34 oster 1699: /* Check to see if we're at the limit... */
1700: while (raidPtr->openings > 0) {
1701: RF_UNLOCK_MUTEX(raidPtr->mutex);
1702:
1703: /* get the next item, if any, from the queue */
1.125 hannken 1704: if ((bp = BUFQ_GET(&rs->buf_queue)) == NULL) {
1.34 oster 1705: /* nothing more to do */
1706: return;
1707: }
1708:
1709: /* Ok, for the bp we have here, bp->b_blkno is relative to the
1.186 perry 1710: * partition.. Need to make it absolute to the underlying
1.34 oster 1711: * device.. */
1.1 oster 1712:
1.34 oster 1713: blocknum = bp->b_blkno;
1714: if (DISKPART(bp->b_dev) != RAW_PART) {
1715: pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
1716: blocknum += pp->p_offset;
1717: }
1.1 oster 1718:
1.186 perry 1719: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1.34 oster 1720: (int) blocknum));
1.186 perry 1721:
1.34 oster 1722: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1723: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.186 perry 1724:
1725: /* *THIS* is where we adjust what block we're going to...
1.34 oster 1726: * but DO NOT TOUCH bp->b_blkno!!! */
1727: raid_addr = blocknum;
1.186 perry 1728:
1.34 oster 1729: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1730: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1731: sum = raid_addr + num_blocks + pb;
1732: if (1 || rf_debugKernelAccess) {
1733: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1734: (int) raid_addr, (int) sum, (int) num_blocks,
1735: (int) pb, (int) bp->b_resid));
1736: }
1737: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1738: || (sum < num_blocks) || (sum < pb)) {
1739: bp->b_error = ENOSPC;
1740: bp->b_flags |= B_ERROR;
1741: bp->b_resid = bp->b_bcount;
1742: biodone(bp);
1743: RF_LOCK_MUTEX(raidPtr->mutex);
1744: continue;
1745: }
1746: /*
1747: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1748: */
1.186 perry 1749:
1.34 oster 1750: if (bp->b_bcount & raidPtr->sectorMask) {
1751: bp->b_error = EINVAL;
1752: bp->b_flags |= B_ERROR;
1753: bp->b_resid = bp->b_bcount;
1754: biodone(bp);
1755: RF_LOCK_MUTEX(raidPtr->mutex);
1756: continue;
1.186 perry 1757:
1.34 oster 1758: }
1759: db1_printf(("Calling DoAccess..\n"));
1.186 perry 1760:
1.1 oster 1761:
1.34 oster 1762: RF_LOCK_MUTEX(raidPtr->mutex);
1763: raidPtr->openings--;
1764: RF_UNLOCK_MUTEX(raidPtr->mutex);
1.1 oster 1765:
1.34 oster 1766: /*
1767: * Everything is async.
1768: */
1769: do_async = 1;
1.186 perry 1770:
1.99 oster 1771: disk_busy(&rs->sc_dkdev);
1772:
1.186 perry 1773: /* XXX we're still at splbio() here... do we *really*
1.34 oster 1774: need to be? */
1.20 oster 1775:
1.186 perry 1776: /* don't ever condition on bp->b_flags & B_WRITE.
1.99 oster 1777: * always condition on B_READ instead */
1.186 perry 1778:
1.180 oster 1779: rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1780: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1781: do_async, raid_addr, num_blocks,
1782: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.151 oster 1783:
1.180 oster 1784: if (rc) {
1785: bp->b_error = rc;
1.151 oster 1786: bp->b_flags |= B_ERROR;
1.180 oster 1787: bp->b_resid = bp->b_bcount;
1788: biodone(bp);
1789: /* continue loop */
1.186 perry 1790: }
1.20 oster 1791:
1792: RF_LOCK_MUTEX(raidPtr->mutex);
1793: }
1.34 oster 1794: RF_UNLOCK_MUTEX(raidPtr->mutex);
1795: }
1.20 oster 1796:
1797:
1.7 explorer 1798:
1799:
1.1 oster 1800: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1801:
1.186 perry 1802: int
1.169 oster 1803: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1 oster 1804: {
1.9 oster 1805: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 1806: struct buf *bp;
1.9 oster 1807: struct raidbuf *raidbp = NULL;
1808:
1.1 oster 1809: req->queue = queue;
1.9 oster 1810:
1.134 oster 1811: #if DIAGNOSTIC
1812: if (queue->raidPtr->raidid >= numraid) {
1.137 itojun 1813: printf("Invalid unit number: %d %d\n", queue->raidPtr->raidid,
1814: numraid);
1.141 provos 1815: panic("Invalid Unit number in rf_DispatchKernelIO");
1.1 oster 1816: }
1.134 oster 1817: #endif
1.1 oster 1818:
1819: bp = req->bp;
1.16 oster 1820: #if 1
1.9 oster 1821: /* XXX when there is a physical disk failure, someone is passing us a
1822: * buffer that contains old stuff!! Attempt to deal with this problem
1823: * without taking a performance hit... (not sure where the real bug
1824: * is. It's buried in RAIDframe somewhere) :-( GO ) */
1.4 oster 1825:
1826: if (bp->b_flags & B_ERROR) {
1827: bp->b_flags &= ~B_ERROR;
1828: }
1.9 oster 1829: if (bp->b_error != 0) {
1.4 oster 1830: bp->b_error = 0;
1831: }
1.16 oster 1832: #endif
1.177 oster 1833: raidbp = pool_get(&rf_pools.cbuf, PR_NOWAIT);
1.154 pk 1834: if (raidbp == NULL) {
1835: bp->b_flags |= B_ERROR;
1836: bp->b_error = ENOMEM;
1837: return (ENOMEM);
1838: }
1.155 thorpej 1839: BUF_INIT(&raidbp->rf_buf);
1.1 oster 1840:
1841: /*
1842: * context for raidiodone
1843: */
1844: raidbp->rf_obp = bp;
1845: raidbp->req = req;
1.32 oster 1846:
1.172 yamt 1847: BIO_COPYPRIO(&raidbp->rf_buf, bp);
1848:
1.1 oster 1849: switch (req->type) {
1.9 oster 1850: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 1851: /* XXX need to do something extra here.. */
1.9 oster 1852: /* I'm leaving this in, as I've never actually seen it used,
1853: * and I'd like folks to report it... GO */
1.1 oster 1854: printf(("WAKEUP CALLED\n"));
1855: queue->numOutstanding++;
1856:
1857: /* XXX need to glue the original buffer into this?? */
1858:
1859: KernelWakeupFunc(&raidbp->rf_buf);
1860: break;
1.9 oster 1861:
1.1 oster 1862: case RF_IO_TYPE_READ:
1863: case RF_IO_TYPE_WRITE:
1.175 oster 1864: #if RF_ACC_TRACE > 0
1.1 oster 1865: if (req->tracerec) {
1866: RF_ETIMER_START(req->tracerec->timer);
1867: }
1.175 oster 1868: #endif
1.9 oster 1869: InitBP(&raidbp->rf_buf, queue->rf_cinfo->ci_vp,
1870: op | bp->b_flags, queue->rf_cinfo->ci_dev,
1871: req->sectorOffset, req->numSector,
1872: req->buf, KernelWakeupFunc, (void *) req,
1873: queue->raidPtr->logBytesPerSector, req->b_proc);
1.1 oster 1874:
1875: if (rf_debugKernelAccess) {
1.9 oster 1876: db1_printf(("dispatch: bp->b_blkno = %ld\n",
1877: (long) bp->b_blkno));
1.1 oster 1878: }
1879: queue->numOutstanding++;
1880: queue->last_deq_sector = req->sectorOffset;
1.9 oster 1881: /* acc wouldn't have been let in if there were any pending
1882: * reqs at any other priority */
1.1 oster 1883: queue->curPriority = req->priority;
1884:
1.166 oster 1885: db1_printf(("Going for %c to unit %d col %d\n",
1.186 perry 1886: req->type, queue->raidPtr->raidid,
1.166 oster 1887: queue->col));
1.1 oster 1888: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 1889: (int) req->sectorOffset, (int) req->numSector,
1890: (int) (req->numSector <<
1891: queue->raidPtr->logBytesPerSector),
1892: (int) queue->raidPtr->logBytesPerSector));
1.1 oster 1893: if ((raidbp->rf_buf.b_flags & B_READ) == 0) {
1894: raidbp->rf_buf.b_vp->v_numoutput++;
1895: }
1.173 hannken 1896: VOP_STRATEGY(raidbp->rf_buf.b_vp, &raidbp->rf_buf);
1.1 oster 1897:
1898: break;
1.9 oster 1899:
1.1 oster 1900: default:
1901: panic("bad req->type in rf_DispatchKernelIO");
1902: }
1903: db1_printf(("Exiting from DispatchKernelIO\n"));
1.134 oster 1904:
1.9 oster 1905: return (0);
1.1 oster 1906: }
1.9 oster 1907: /* this is the callback function associated with a I/O invoked from
1.1 oster 1908: kernel code.
1909: */
1.186 perry 1910: static void
1.169 oster 1911: KernelWakeupFunc(struct buf *vbp)
1.9 oster 1912: {
1913: RF_DiskQueueData_t *req = NULL;
1914: RF_DiskQueue_t *queue;
1915: struct raidbuf *raidbp = (struct raidbuf *) vbp;
1916: struct buf *bp;
1.74 augustss 1917: int s;
1.9 oster 1918:
1.36 oster 1919: s = splbio();
1.9 oster 1920: db1_printf(("recovering the request queue:\n"));
1921: req = raidbp->req;
1.1 oster 1922:
1.9 oster 1923: bp = raidbp->rf_obp;
1.1 oster 1924:
1.9 oster 1925: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 1926:
1.9 oster 1927: if (raidbp->rf_buf.b_flags & B_ERROR) {
1928: bp->b_flags |= B_ERROR;
1929: bp->b_error = raidbp->rf_buf.b_error ?
1930: raidbp->rf_buf.b_error : EIO;
1931: }
1.1 oster 1932:
1.9 oster 1933: /* XXX methinks this could be wrong... */
1.1 oster 1934: #if 1
1.9 oster 1935: bp->b_resid = raidbp->rf_buf.b_resid;
1.1 oster 1936: #endif
1.175 oster 1937: #if RF_ACC_TRACE > 0
1.9 oster 1938: if (req->tracerec) {
1939: RF_ETIMER_STOP(req->tracerec->timer);
1940: RF_ETIMER_EVAL(req->tracerec->timer);
1941: RF_LOCK_MUTEX(rf_tracing_mutex);
1942: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1943: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
1944: req->tracerec->num_phys_ios++;
1945: RF_UNLOCK_MUTEX(rf_tracing_mutex);
1946: }
1.175 oster 1947: #endif
1.9 oster 1948: bp->b_bcount = raidbp->rf_buf.b_bcount; /* XXXX ?? */
1.1 oster 1949:
1.9 oster 1950: /* XXX Ok, let's get aggressive... If B_ERROR is set, let's go
1951: * ballistic, and mark the component as hosed... */
1.36 oster 1952:
1.9 oster 1953: if (bp->b_flags & B_ERROR) {
1954: /* Mark the disk as dead */
1955: /* but only mark it once... */
1.186 perry 1956: /* and only if it wouldn't leave this RAID set
1.183 oster 1957: completely broken */
1958: if ((queue->raidPtr->Disks[queue->col].status ==
1.186 perry 1959: rf_ds_optimal) && (queue->raidPtr->numFailures <
1.183 oster 1960: queue->raidPtr->Layout.map->faultsTolerated)) {
1.9 oster 1961: printf("raid%d: IO Error. Marking %s as failed.\n",
1.136 oster 1962: queue->raidPtr->raidid,
1.166 oster 1963: queue->raidPtr->Disks[queue->col].devname);
1964: queue->raidPtr->Disks[queue->col].status =
1.9 oster 1965: rf_ds_failed;
1.166 oster 1966: queue->raidPtr->status = rf_rs_degraded;
1.9 oster 1967: queue->raidPtr->numFailures++;
1.56 oster 1968: queue->raidPtr->numNewFailures++;
1.9 oster 1969: } else { /* Disk is already dead... */
1970: /* printf("Disk already marked as dead!\n"); */
1971: }
1.4 oster 1972:
1.9 oster 1973: }
1.4 oster 1974:
1.177 oster 1975: pool_put(&rf_pools.cbuf, raidbp);
1.9 oster 1976:
1.143 oster 1977: /* Fill in the error value */
1978:
1979: req->error = (bp->b_flags & B_ERROR) ? bp->b_error : 0;
1980:
1981: simple_lock(&queue->raidPtr->iodone_lock);
1982:
1983: /* Drop this one on the "finished" queue... */
1984: TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
1985:
1986: /* Let the raidio thread know there is work to be done. */
1987: wakeup(&(queue->raidPtr->iodone));
1988:
1989: simple_unlock(&queue->raidPtr->iodone_lock);
1.1 oster 1990:
1.36 oster 1991: splx(s);
1.1 oster 1992: }
1993:
1994:
1995:
1996: /*
1997: * initialize a buf structure for doing an I/O in the kernel.
1998: */
1.186 perry 1999: static void
1.169 oster 2000: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.187 christos 2001: RF_SectorNum_t startSect, RF_SectorCount_t numSect, caddr_t bf,
1.169 oster 2002: void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2003: struct proc *b_proc)
1.9 oster 2004: {
2005: /* bp->b_flags = B_PHYS | rw_flag; */
2006: bp->b_flags = B_CALL | rw_flag; /* XXX need B_PHYS here too??? */
2007: bp->b_bcount = numSect << logBytesPerSector;
2008: bp->b_bufsize = bp->b_bcount;
2009: bp->b_error = 0;
2010: bp->b_dev = dev;
1.187 christos 2011: bp->b_data = bf;
1.9 oster 2012: bp->b_blkno = startSect;
2013: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 2014: if (bp->b_bcount == 0) {
1.141 provos 2015: panic("bp->b_bcount is zero in InitBP!!");
1.1 oster 2016: }
1.161 fvdl 2017: bp->b_proc = b_proc;
1.9 oster 2018: bp->b_iodone = cbFunc;
2019: bp->b_vp = b_vp;
2020:
1.1 oster 2021: }
2022:
2023: static void
1.186 perry 2024: raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1.169 oster 2025: struct disklabel *lp)
1.1 oster 2026: {
1.108 thorpej 2027: memset(lp, 0, sizeof(*lp));
1.1 oster 2028:
2029: /* fabricate a label... */
2030: lp->d_secperunit = raidPtr->totalSectors;
2031: lp->d_secsize = raidPtr->bytesPerSector;
1.45 oster 2032: lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105 oster 2033: lp->d_ntracks = 4 * raidPtr->numCol;
1.186 perry 2034: lp->d_ncylinders = raidPtr->totalSectors /
1.45 oster 2035: (lp->d_nsectors * lp->d_ntracks);
1.1 oster 2036: lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2037:
2038: strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9 oster 2039: lp->d_type = DTYPE_RAID;
1.1 oster 2040: strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2041: lp->d_rpm = 3600;
2042: lp->d_interleave = 1;
2043: lp->d_flags = 0;
2044:
2045: lp->d_partitions[RAW_PART].p_offset = 0;
2046: lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2047: lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2048: lp->d_npartitions = RAW_PART + 1;
2049:
2050: lp->d_magic = DISKMAGIC;
2051: lp->d_magic2 = DISKMAGIC;
2052: lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2053:
2054: }
2055: /*
2056: * Read the disklabel from the raid device. If one is not present, fake one
2057: * up.
2058: */
2059: static void
1.169 oster 2060: raidgetdisklabel(dev_t dev)
1.1 oster 2061: {
1.9 oster 2062: int unit = raidunit(dev);
1.1 oster 2063: struct raid_softc *rs = &raid_softc[unit];
1.158 dsl 2064: const char *errstring;
1.1 oster 2065: struct disklabel *lp = rs->sc_dkdev.dk_label;
2066: struct cpu_disklabel *clp = rs->sc_dkdev.dk_cpulabel;
2067: RF_Raid_t *raidPtr;
2068:
2069: db1_printf(("Getting the disklabel...\n"));
2070:
1.108 thorpej 2071: memset(clp, 0, sizeof(*clp));
1.1 oster 2072:
2073: raidPtr = raidPtrs[unit];
2074:
2075: raidgetdefaultlabel(raidPtr, rs, lp);
2076:
2077: /*
2078: * Call the generic disklabel extraction routine.
2079: */
2080: errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2081: rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9 oster 2082: if (errstring)
1.1 oster 2083: raidmakedisklabel(rs);
2084: else {
1.9 oster 2085: int i;
1.1 oster 2086: struct partition *pp;
2087:
2088: /*
2089: * Sanity check whether the found disklabel is valid.
2090: *
2091: * This is necessary since total size of the raid device
2092: * may vary when an interleave is changed even though exactly
2093: * same componets are used, and old disklabel may used
2094: * if that is found.
2095: */
2096: if (lp->d_secperunit != rs->sc_size)
1.123 oster 2097: printf("raid%d: WARNING: %s: "
1.1 oster 2098: "total sector size in disklabel (%d) != "
1.123 oster 2099: "the size of raid (%ld)\n", unit, rs->sc_xname,
1.18 oster 2100: lp->d_secperunit, (long) rs->sc_size);
1.1 oster 2101: for (i = 0; i < lp->d_npartitions; i++) {
2102: pp = &lp->d_partitions[i];
2103: if (pp->p_offset + pp->p_size > rs->sc_size)
1.123 oster 2104: printf("raid%d: WARNING: %s: end of partition `%c' "
1.186 perry 2105: "exceeds the size of raid (%ld)\n",
1.123 oster 2106: unit, rs->sc_xname, 'a' + i, (long) rs->sc_size);
1.1 oster 2107: }
2108: }
2109:
2110: }
2111: /*
2112: * Take care of things one might want to take care of in the event
2113: * that a disklabel isn't present.
2114: */
2115: static void
1.169 oster 2116: raidmakedisklabel(struct raid_softc *rs)
1.1 oster 2117: {
2118: struct disklabel *lp = rs->sc_dkdev.dk_label;
2119: db1_printf(("Making a label..\n"));
2120:
2121: /*
2122: * For historical reasons, if there's no disklabel present
2123: * the raw partition must be marked FS_BSDFFS.
2124: */
2125:
2126: lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2127:
2128: strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2129:
2130: lp->d_checksum = dkcksum(lp);
2131: }
2132: /*
2133: * Lookup the provided name in the filesystem. If the file exists,
2134: * is a valid block device, and isn't being used by anyone else,
2135: * set *vpp to the file's vnode.
1.9 oster 2136: * You'll find the original of this in ccd.c
1.1 oster 2137: */
2138: int
1.169 oster 2139: raidlookup(char *path, struct proc *p, struct vnode **vpp)
1.1 oster 2140: {
2141: struct nameidata nd;
2142: struct vnode *vp;
2143: struct vattr va;
1.9 oster 2144: int error;
1.1 oster 2145:
1.161 fvdl 2146: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
1.9 oster 2147: if ((error = vn_open(&nd, FREAD | FWRITE, 0)) != 0) {
1.1 oster 2148: return (error);
2149: }
2150: vp = nd.ni_vp;
2151: if (vp->v_usecount > 1) {
2152: VOP_UNLOCK(vp, 0);
1.161 fvdl 2153: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2154: return (EBUSY);
2155: }
1.161 fvdl 2156: if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) != 0) {
1.1 oster 2157: VOP_UNLOCK(vp, 0);
1.161 fvdl 2158: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2159: return (error);
2160: }
2161: /* XXX: eventually we should handle VREG, too. */
2162: if (va.va_type != VBLK) {
2163: VOP_UNLOCK(vp, 0);
1.161 fvdl 2164: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.1 oster 2165: return (ENOTBLK);
2166: }
2167: VOP_UNLOCK(vp, 0);
2168: *vpp = vp;
2169: return (0);
2170: }
2171: /*
2172: * Wait interruptibly for an exclusive lock.
2173: *
2174: * XXX
2175: * Several drivers do this; it should be abstracted and made MP-safe.
2176: * (Hmm... where have we seen this warning before :-> GO )
2177: */
2178: static int
1.169 oster 2179: raidlock(struct raid_softc *rs)
1.1 oster 2180: {
1.9 oster 2181: int error;
1.1 oster 2182:
2183: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2184: rs->sc_flags |= RAIDF_WANTED;
1.9 oster 2185: if ((error =
2186: tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1 oster 2187: return (error);
2188: }
2189: rs->sc_flags |= RAIDF_LOCKED;
2190: return (0);
2191: }
2192: /*
2193: * Unlock and wake up any waiters.
2194: */
2195: static void
1.169 oster 2196: raidunlock(struct raid_softc *rs)
1.1 oster 2197: {
2198:
2199: rs->sc_flags &= ~RAIDF_LOCKED;
2200: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2201: rs->sc_flags &= ~RAIDF_WANTED;
2202: wakeup(rs);
2203: }
1.11 oster 2204: }
1.186 perry 2205:
1.11 oster 2206:
2207: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2208: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
2209:
1.186 perry 2210: int
1.12 oster 2211: raidmarkclean(dev_t dev, struct vnode *b_vp, int mod_counter)
2212: {
1.48 oster 2213: RF_ComponentLabel_t clabel;
2214: raidread_component_label(dev, b_vp, &clabel);
2215: clabel.mod_counter = mod_counter;
2216: clabel.clean = RF_RAID_CLEAN;
2217: raidwrite_component_label(dev, b_vp, &clabel);
1.12 oster 2218: return(0);
2219: }
2220:
2221:
1.186 perry 2222: int
1.12 oster 2223: raidmarkdirty(dev_t dev, struct vnode *b_vp, int mod_counter)
1.11 oster 2224: {
1.48 oster 2225: RF_ComponentLabel_t clabel;
2226: raidread_component_label(dev, b_vp, &clabel);
2227: clabel.mod_counter = mod_counter;
2228: clabel.clean = RF_RAID_DIRTY;
2229: raidwrite_component_label(dev, b_vp, &clabel);
1.11 oster 2230: return(0);
2231: }
2232:
2233: /* ARGSUSED */
2234: int
1.186 perry 2235: raidread_component_label(dev_t dev, struct vnode *b_vp,
1.169 oster 2236: RF_ComponentLabel_t *clabel)
1.11 oster 2237: {
2238: struct buf *bp;
1.130 gehenna 2239: const struct bdevsw *bdev;
1.11 oster 2240: int error;
1.186 perry 2241:
1.11 oster 2242: /* XXX should probably ensure that we don't try to do this if
1.186 perry 2243: someone has changed rf_protected_sectors. */
1.11 oster 2244:
1.98 oster 2245: if (b_vp == NULL) {
2246: /* For whatever reason, this component is not valid.
2247: Don't try to read a component label from it. */
2248: return(EINVAL);
2249: }
2250:
1.11 oster 2251: /* get a block of the appropriate size... */
2252: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2253: bp->b_dev = dev;
2254:
2255: /* get our ducks in a row for the read */
2256: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2257: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100 chs 2258: bp->b_flags |= B_READ;
1.11 oster 2259: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2260:
1.130 gehenna 2261: bdev = bdevsw_lookup(bp->b_dev);
2262: if (bdev == NULL)
2263: return (ENXIO);
2264: (*bdev->d_strategy)(bp);
1.11 oster 2265:
1.186 perry 2266: error = biowait(bp);
1.11 oster 2267:
2268: if (!error) {
1.79 thorpej 2269: memcpy(clabel, bp->b_data,
1.11 oster 2270: sizeof(RF_ComponentLabel_t));
1.186 perry 2271: }
1.11 oster 2272:
1.186 perry 2273: brelse(bp);
1.11 oster 2274: return(error);
2275: }
2276: /* ARGSUSED */
1.186 perry 2277: int
2278: raidwrite_component_label(dev_t dev, struct vnode *b_vp,
1.169 oster 2279: RF_ComponentLabel_t *clabel)
1.11 oster 2280: {
2281: struct buf *bp;
1.130 gehenna 2282: const struct bdevsw *bdev;
1.11 oster 2283: int error;
2284:
2285: /* get a block of the appropriate size... */
2286: bp = geteblk((int)RF_COMPONENT_INFO_SIZE);
2287: bp->b_dev = dev;
2288:
2289: /* get our ducks in a row for the write */
2290: bp->b_blkno = RF_COMPONENT_INFO_OFFSET / DEV_BSIZE;
2291: bp->b_bcount = RF_COMPONENT_INFO_SIZE;
1.100 chs 2292: bp->b_flags |= B_WRITE;
1.11 oster 2293: bp->b_resid = RF_COMPONENT_INFO_SIZE / DEV_BSIZE;
2294:
1.79 thorpej 2295: memset(bp->b_data, 0, RF_COMPONENT_INFO_SIZE );
1.11 oster 2296:
1.79 thorpej 2297: memcpy(bp->b_data, clabel, sizeof(RF_ComponentLabel_t));
1.11 oster 2298:
1.130 gehenna 2299: bdev = bdevsw_lookup(bp->b_dev);
2300: if (bdev == NULL)
2301: return (ENXIO);
2302: (*bdev->d_strategy)(bp);
1.186 perry 2303: error = biowait(bp);
1.11 oster 2304: brelse(bp);
2305: if (error) {
1.48 oster 2306: #if 1
1.11 oster 2307: printf("Failed to write RAID component info!\n");
1.48 oster 2308: #endif
1.11 oster 2309: }
2310:
2311: return(error);
1.1 oster 2312: }
1.12 oster 2313:
1.186 perry 2314: void
1.169 oster 2315: rf_markalldirty(RF_Raid_t *raidPtr)
1.12 oster 2316: {
1.48 oster 2317: RF_ComponentLabel_t clabel;
1.146 oster 2318: int sparecol;
1.166 oster 2319: int c;
2320: int j;
2321: int scol = -1;
1.12 oster 2322:
2323: raidPtr->mod_counter++;
1.166 oster 2324: for (c = 0; c < raidPtr->numCol; c++) {
2325: /* we don't want to touch (at all) a disk that has
2326: failed */
2327: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
2328: raidread_component_label(
2329: raidPtr->Disks[c].dev,
2330: raidPtr->raid_cinfo[c].ci_vp,
2331: &clabel);
2332: if (clabel.status == rf_ds_spared) {
1.186 perry 2333: /* XXX do something special...
2334: but whatever you do, don't
1.166 oster 2335: try to access it!! */
2336: } else {
1.186 perry 2337: raidmarkdirty(
1.166 oster 2338: raidPtr->Disks[c].dev,
2339: raidPtr->raid_cinfo[c].ci_vp,
1.146 oster 2340: raidPtr->mod_counter);
1.12 oster 2341: }
1.166 oster 2342: }
1.186 perry 2343: }
1.146 oster 2344:
1.12 oster 2345: for( c = 0; c < raidPtr->numSpare ; c++) {
2346: sparecol = raidPtr->numCol + c;
1.166 oster 2347: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2348: /*
2349:
2350: we claim this disk is "optimal" if it's
2351: rf_ds_used_spare, as that means it should be
2352: directly substitutable for the disk it replaced.
1.12 oster 2353: We note that too...
2354:
2355: */
2356:
1.166 oster 2357: for(j=0;j<raidPtr->numCol;j++) {
2358: if (raidPtr->Disks[j].spareCol == sparecol) {
2359: scol = j;
2360: break;
1.12 oster 2361: }
2362: }
1.186 perry 2363:
2364: raidread_component_label(
1.166 oster 2365: raidPtr->Disks[sparecol].dev,
2366: raidPtr->raid_cinfo[sparecol].ci_vp,
1.146 oster 2367: &clabel);
1.12 oster 2368: /* make sure status is noted */
1.146 oster 2369:
2370: raid_init_component_label(raidPtr, &clabel);
2371:
1.166 oster 2372: clabel.row = 0;
1.48 oster 2373: clabel.column = scol;
1.146 oster 2374: /* Note: we *don't* change status from rf_ds_used_spare
2375: to rf_ds_optimal */
2376: /* clabel.status = rf_ds_optimal; */
1.186 perry 2377:
1.166 oster 2378: raidmarkdirty(raidPtr->Disks[sparecol].dev,
2379: raidPtr->raid_cinfo[sparecol].ci_vp,
1.146 oster 2380: raidPtr->mod_counter);
1.12 oster 2381: }
2382: }
2383: }
2384:
1.13 oster 2385:
2386: void
1.169 oster 2387: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13 oster 2388: {
1.48 oster 2389: RF_ComponentLabel_t clabel;
1.13 oster 2390: int sparecol;
1.166 oster 2391: int c;
2392: int j;
2393: int scol;
1.13 oster 2394:
2395: scol = -1;
2396:
1.186 perry 2397: /* XXX should do extra checks to make sure things really are clean,
1.13 oster 2398: rather than blindly setting the clean bit... */
2399:
2400: raidPtr->mod_counter++;
2401:
1.166 oster 2402: for (c = 0; c < raidPtr->numCol; c++) {
2403: if (raidPtr->Disks[c].status == rf_ds_optimal) {
2404: raidread_component_label(
2405: raidPtr->Disks[c].dev,
2406: raidPtr->raid_cinfo[c].ci_vp,
2407: &clabel);
1.13 oster 2408: /* make sure status is noted */
1.166 oster 2409: clabel.status = rf_ds_optimal;
1.57 oster 2410: /* bump the counter */
1.166 oster 2411: clabel.mod_counter = raidPtr->mod_counter;
1.57 oster 2412:
1.186 perry 2413: raidwrite_component_label(
1.166 oster 2414: raidPtr->Disks[c].dev,
2415: raidPtr->raid_cinfo[c].ci_vp,
2416: &clabel);
2417: if (final == RF_FINAL_COMPONENT_UPDATE) {
2418: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.186 perry 2419: raidmarkclean(
2420: raidPtr->Disks[c].dev,
1.166 oster 2421: raidPtr->raid_cinfo[c].ci_vp,
2422: raidPtr->mod_counter);
1.91 oster 2423: }
1.166 oster 2424: }
1.186 perry 2425: }
1.166 oster 2426: /* else we don't touch it.. */
1.186 perry 2427: }
1.63 oster 2428:
2429: for( c = 0; c < raidPtr->numSpare ; c++) {
2430: sparecol = raidPtr->numCol + c;
1.110 oster 2431: /* Need to ensure that the reconstruct actually completed! */
1.166 oster 2432: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2433: /*
2434:
2435: we claim this disk is "optimal" if it's
2436: rf_ds_used_spare, as that means it should be
2437: directly substitutable for the disk it replaced.
1.63 oster 2438: We note that too...
2439:
2440: */
2441:
1.166 oster 2442: for(j=0;j<raidPtr->numCol;j++) {
2443: if (raidPtr->Disks[j].spareCol == sparecol) {
2444: scol = j;
2445: break;
1.63 oster 2446: }
2447: }
1.186 perry 2448:
1.63 oster 2449: /* XXX shouldn't *really* need this... */
1.186 perry 2450: raidread_component_label(
1.166 oster 2451: raidPtr->Disks[sparecol].dev,
2452: raidPtr->raid_cinfo[sparecol].ci_vp,
1.63 oster 2453: &clabel);
2454: /* make sure status is noted */
2455:
2456: raid_init_component_label(raidPtr, &clabel);
2457:
2458: clabel.mod_counter = raidPtr->mod_counter;
2459: clabel.column = scol;
2460: clabel.status = rf_ds_optimal;
2461:
2462: raidwrite_component_label(
1.166 oster 2463: raidPtr->Disks[sparecol].dev,
2464: raidPtr->raid_cinfo[sparecol].ci_vp,
1.63 oster 2465: &clabel);
1.91 oster 2466: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2467: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.166 oster 2468: raidmarkclean( raidPtr->Disks[sparecol].dev,
2469: raidPtr->raid_cinfo[sparecol].ci_vp,
1.91 oster 2470: raidPtr->mod_counter);
1.13 oster 2471: }
2472: }
2473: }
2474: }
1.68 oster 2475: }
2476:
2477: void
1.169 oster 2478: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69 oster 2479: {
2480: struct proc *p;
2481:
2482: p = raidPtr->engine_thread;
2483:
2484: if (vp != NULL) {
2485: if (auto_configured == 1) {
1.96 oster 2486: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2487: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.69 oster 2488: vput(vp);
1.186 perry 2489:
2490: } else {
1.161 fvdl 2491: (void) vn_close(vp, FREAD | FWRITE, p->p_ucred, p);
1.69 oster 2492: }
1.186 perry 2493: }
1.69 oster 2494: }
2495:
2496:
2497: void
1.169 oster 2498: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68 oster 2499: {
1.186 perry 2500: int r,c;
1.69 oster 2501: struct vnode *vp;
2502: int acd;
1.68 oster 2503:
2504:
2505: /* We take this opportunity to close the vnodes like we should.. */
2506:
1.166 oster 2507: for (c = 0; c < raidPtr->numCol; c++) {
2508: vp = raidPtr->raid_cinfo[c].ci_vp;
2509: acd = raidPtr->Disks[c].auto_configured;
2510: rf_close_component(raidPtr, vp, acd);
2511: raidPtr->raid_cinfo[c].ci_vp = NULL;
2512: raidPtr->Disks[c].auto_configured = 0;
1.68 oster 2513: }
1.166 oster 2514:
1.68 oster 2515: for (r = 0; r < raidPtr->numSpare; r++) {
1.166 oster 2516: vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2517: acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69 oster 2518: rf_close_component(raidPtr, vp, acd);
1.166 oster 2519: raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2520: raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2521: }
1.37 oster 2522: }
1.63 oster 2523:
1.37 oster 2524:
1.186 perry 2525: void
1.169 oster 2526: rf_ReconThread(struct rf_recon_req *req)
1.37 oster 2527: {
2528: int s;
2529: RF_Raid_t *raidPtr;
2530:
2531: s = splbio();
2532: raidPtr = (RF_Raid_t *) req->raidPtr;
2533: raidPtr->recon_in_progress = 1;
2534:
1.166 oster 2535: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37 oster 2536: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2537:
2538: RF_Free(req, sizeof(*req));
2539:
2540: raidPtr->recon_in_progress = 0;
2541: splx(s);
2542:
2543: /* That's all... */
2544: kthread_exit(0); /* does not return */
2545: }
2546:
2547: void
1.169 oster 2548: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37 oster 2549: {
2550: int retcode;
2551: int s;
2552:
1.184 oster 2553: raidPtr->parity_rewrite_stripes_done = 0;
1.37 oster 2554: raidPtr->parity_rewrite_in_progress = 1;
2555: s = splbio();
2556: retcode = rf_RewriteParity(raidPtr);
2557: splx(s);
2558: if (retcode) {
2559: printf("raid%d: Error re-writing parity!\n",raidPtr->raidid);
2560: } else {
2561: /* set the clean bit! If we shutdown correctly,
2562: the clean bit on each component label will get
2563: set */
2564: raidPtr->parity_good = RF_RAID_CLEAN;
2565: }
2566: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2567:
2568: /* Anyone waiting for us to stop? If so, inform them... */
2569: if (raidPtr->waitShutdown) {
2570: wakeup(&raidPtr->parity_rewrite_in_progress);
2571: }
1.37 oster 2572:
2573: /* That's all... */
2574: kthread_exit(0); /* does not return */
2575: }
2576:
2577:
2578: void
1.169 oster 2579: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37 oster 2580: {
2581: int s;
2582:
2583: raidPtr->copyback_in_progress = 1;
2584: s = splbio();
2585: rf_CopybackReconstructedData(raidPtr);
2586: splx(s);
2587: raidPtr->copyback_in_progress = 0;
2588:
2589: /* That's all... */
2590: kthread_exit(0); /* does not return */
2591: }
2592:
2593:
2594: void
1.169 oster 2595: rf_ReconstructInPlaceThread(struct rf_recon_req *req)
1.37 oster 2596: {
2597: int s;
2598: RF_Raid_t *raidPtr;
1.186 perry 2599:
1.37 oster 2600: s = splbio();
2601: raidPtr = req->raidPtr;
2602: raidPtr->recon_in_progress = 1;
1.166 oster 2603: rf_ReconstructInPlace(raidPtr, req->col);
1.37 oster 2604: RF_Free(req, sizeof(*req));
2605: raidPtr->recon_in_progress = 0;
2606: splx(s);
2607:
2608: /* That's all... */
2609: kthread_exit(0); /* does not return */
1.48 oster 2610: }
2611:
2612: RF_AutoConfig_t *
2613: rf_find_raid_components()
2614: {
2615: struct vnode *vp;
2616: struct disklabel label;
2617: struct device *dv;
2618: dev_t dev;
1.130 gehenna 2619: int bmajor;
1.48 oster 2620: int error;
2621: int i;
2622: int good_one;
2623: RF_ComponentLabel_t *clabel;
2624: RF_AutoConfig_t *ac_list;
2625: RF_AutoConfig_t *ac;
2626:
2627:
2628: /* initialize the AutoConfig list */
2629: ac_list = NULL;
2630:
2631: /* we begin by trolling through *all* the devices on the system */
2632:
2633: for (dv = alldevs.tqh_first; dv != NULL;
2634: dv = dv->dv_list.tqe_next) {
2635:
2636: /* we are only interested in disks... */
2637: if (dv->dv_class != DV_DISK)
2638: continue;
2639:
2640: /* we don't care about floppies... */
1.140 thorpej 2641: if (!strcmp(dv->dv_cfdata->cf_name,"fd")) {
1.119 leo 2642: continue;
2643: }
1.129 oster 2644:
2645: /* we don't care about CD's... */
1.140 thorpej 2646: if (!strcmp(dv->dv_cfdata->cf_name,"cd")) {
1.129 oster 2647: continue;
2648: }
2649:
1.120 leo 2650: /* hdfd is the Atari/Hades floppy driver */
1.140 thorpej 2651: if (!strcmp(dv->dv_cfdata->cf_name,"hdfd")) {
1.121 leo 2652: continue;
2653: }
2654: /* fdisa is the Atari/Milan floppy driver */
1.140 thorpej 2655: if (!strcmp(dv->dv_cfdata->cf_name,"fdisa")) {
1.48 oster 2656: continue;
2657: }
1.186 perry 2658:
1.48 oster 2659: /* need to find the device_name_to_block_device_major stuff */
1.130 gehenna 2660: bmajor = devsw_name2blk(dv->dv_xname, NULL, 0);
1.48 oster 2661:
2662: /* get a vnode for the raw partition of this disk */
2663:
1.130 gehenna 2664: dev = MAKEDISKDEV(bmajor, dv->dv_unit, RAW_PART);
1.48 oster 2665: if (bdevvp(dev, &vp))
2666: panic("RAID can't alloc vnode");
2667:
2668: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2669:
2670: if (error) {
1.186 perry 2671: /* "Who cares." Continue looking
1.48 oster 2672: for something that exists*/
2673: vput(vp);
2674: continue;
2675: }
2676:
2677: /* Ok, the disk exists. Go get the disklabel. */
1.156 dsl 2678: error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED, 0);
1.48 oster 2679: if (error) {
2680: /*
2681: * XXX can't happen - open() would
2682: * have errored out (or faked up one)
2683: */
1.181 thorpej 2684: if (error != ENOTTY)
2685: printf("RAIDframe: can't get label for dev "
2686: "%s (%d)\n", dv->dv_xname, error);
1.48 oster 2687: }
2688:
2689: /* don't need this any more. We'll allocate it again
2690: a little later if we really do... */
1.96 oster 2691: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2692: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48 oster 2693: vput(vp);
2694:
1.181 thorpej 2695: if (error)
2696: continue;
2697:
1.48 oster 2698: for (i=0; i < label.d_npartitions; i++) {
2699: /* We only support partitions marked as RAID */
2700: if (label.d_partitions[i].p_fstype != FS_RAID)
2701: continue;
2702:
1.130 gehenna 2703: dev = MAKEDISKDEV(bmajor, dv->dv_unit, i);
1.48 oster 2704: if (bdevvp(dev, &vp))
2705: panic("RAID can't alloc vnode");
2706:
2707: error = VOP_OPEN(vp, FREAD, NOCRED, 0);
2708: if (error) {
2709: /* Whatever... */
2710: vput(vp);
2711: continue;
2712: }
2713:
2714: good_one = 0;
2715:
1.186 perry 2716: clabel = (RF_ComponentLabel_t *)
2717: malloc(sizeof(RF_ComponentLabel_t),
1.48 oster 2718: M_RAIDFRAME, M_NOWAIT);
2719: if (clabel == NULL) {
2720: /* XXX CLEANUP HERE */
2721: printf("RAID auto config: out of memory!\n");
2722: return(NULL); /* XXX probably should panic? */
2723: }
2724:
2725: if (!raidread_component_label(dev, vp, clabel)) {
2726: /* Got the label. Does it look reasonable? */
1.49 oster 2727: if (rf_reasonable_label(clabel) &&
1.186 perry 2728: (clabel->partitionSize <=
1.48 oster 2729: label.d_partitions[i].p_size)) {
2730: #if DEBUG
1.186 perry 2731: printf("Component on: %s%c: %d\n",
1.48 oster 2732: dv->dv_xname, 'a'+i,
2733: label.d_partitions[i].p_size);
1.67 oster 2734: rf_print_component_label(clabel);
1.48 oster 2735: #endif
1.186 perry 2736: /* if it's reasonable, add it,
1.48 oster 2737: else ignore it. */
2738: ac = (RF_AutoConfig_t *)
2739: malloc(sizeof(RF_AutoConfig_t),
2740: M_RAIDFRAME,
2741: M_NOWAIT);
2742: if (ac == NULL) {
2743: /* XXX should panic?? */
2744: return(NULL);
2745: }
1.186 perry 2746:
1.179 itojun 2747: snprintf(ac->devname,
2748: sizeof(ac->devname), "%s%c",
2749: dv->dv_xname, 'a'+i);
1.48 oster 2750: ac->dev = dev;
2751: ac->vp = vp;
2752: ac->clabel = clabel;
2753: ac->next = ac_list;
2754: ac_list = ac;
2755: good_one = 1;
1.186 perry 2756: }
1.48 oster 2757: }
2758: if (!good_one) {
2759: /* cleanup */
2760: free(clabel, M_RAIDFRAME);
1.96 oster 2761: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.97 oster 2762: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED, 0);
1.48 oster 2763: vput(vp);
2764: }
2765: }
2766: }
1.106 oster 2767: return(ac_list);
1.48 oster 2768: }
1.186 perry 2769:
1.48 oster 2770: static int
1.169 oster 2771: rf_reasonable_label(RF_ComponentLabel_t *clabel)
1.48 oster 2772: {
1.186 perry 2773:
1.48 oster 2774: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2775: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2776: ((clabel->clean == RF_RAID_CLEAN) ||
2777: (clabel->clean == RF_RAID_DIRTY)) &&
1.186 perry 2778: clabel->row >=0 &&
2779: clabel->column >= 0 &&
1.48 oster 2780: clabel->num_rows > 0 &&
2781: clabel->num_columns > 0 &&
1.186 perry 2782: clabel->row < clabel->num_rows &&
1.48 oster 2783: clabel->column < clabel->num_columns &&
2784: clabel->blockSize > 0 &&
2785: clabel->numBlocks > 0) {
2786: /* label looks reasonable enough... */
2787: return(1);
2788: }
2789: return(0);
2790: }
2791:
2792:
1.138 oster 2793: #if DEBUG
1.48 oster 2794: void
1.169 oster 2795: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48 oster 2796: {
2797: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186 perry 2798: clabel->row, clabel->column,
1.48 oster 2799: clabel->num_rows, clabel->num_columns);
2800: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
2801: clabel->version, clabel->serial_number,
2802: clabel->mod_counter);
2803: printf(" Clean: %s Status: %d\n",
2804: clabel->clean ? "Yes" : "No", clabel->status );
2805: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
2806: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
2807: printf(" RAID Level: %c blocksize: %d numBlocks: %d\n",
1.186 perry 2808: (char) clabel->parityConfig, clabel->blockSize,
1.48 oster 2809: clabel->numBlocks);
2810: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No" );
1.186 perry 2811: printf(" Contains root partition: %s\n",
1.75 oster 2812: clabel->root_partition ? "Yes" : "No" );
1.48 oster 2813: printf(" Last configured as: raid%d\n", clabel->last_unit );
1.51 oster 2814: #if 0
2815: printf(" Config order: %d\n", clabel->config_order);
2816: #endif
1.186 perry 2817:
1.48 oster 2818: }
1.133 oster 2819: #endif
1.48 oster 2820:
2821: RF_ConfigSet_t *
1.169 oster 2822: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48 oster 2823: {
2824: RF_AutoConfig_t *ac;
2825: RF_ConfigSet_t *config_sets;
2826: RF_ConfigSet_t *cset;
2827: RF_AutoConfig_t *ac_next;
2828:
2829:
2830: config_sets = NULL;
2831:
2832: /* Go through the AutoConfig list, and figure out which components
2833: belong to what sets. */
2834: ac = ac_list;
2835: while(ac!=NULL) {
2836: /* we're going to putz with ac->next, so save it here
2837: for use at the end of the loop */
2838: ac_next = ac->next;
2839:
2840: if (config_sets == NULL) {
2841: /* will need at least this one... */
2842: config_sets = (RF_ConfigSet_t *)
1.186 perry 2843: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 2844: M_RAIDFRAME, M_NOWAIT);
2845: if (config_sets == NULL) {
1.141 provos 2846: panic("rf_create_auto_sets: No memory!");
1.48 oster 2847: }
2848: /* this one is easy :) */
2849: config_sets->ac = ac;
2850: config_sets->next = NULL;
1.51 oster 2851: config_sets->rootable = 0;
1.48 oster 2852: ac->next = NULL;
2853: } else {
2854: /* which set does this component fit into? */
2855: cset = config_sets;
2856: while(cset!=NULL) {
1.49 oster 2857: if (rf_does_it_fit(cset, ac)) {
1.86 oster 2858: /* looks like it matches... */
2859: ac->next = cset->ac;
2860: cset->ac = ac;
1.48 oster 2861: break;
2862: }
2863: cset = cset->next;
2864: }
2865: if (cset==NULL) {
2866: /* didn't find a match above... new set..*/
2867: cset = (RF_ConfigSet_t *)
1.186 perry 2868: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 2869: M_RAIDFRAME, M_NOWAIT);
2870: if (cset == NULL) {
1.141 provos 2871: panic("rf_create_auto_sets: No memory!");
1.48 oster 2872: }
2873: cset->ac = ac;
2874: ac->next = NULL;
2875: cset->next = config_sets;
1.51 oster 2876: cset->rootable = 0;
1.48 oster 2877: config_sets = cset;
2878: }
2879: }
2880: ac = ac_next;
2881: }
2882:
2883:
2884: return(config_sets);
2885: }
2886:
2887: static int
1.169 oster 2888: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48 oster 2889: {
2890: RF_ComponentLabel_t *clabel1, *clabel2;
2891:
2892: /* If this one matches the *first* one in the set, that's good
2893: enough, since the other members of the set would have been
2894: through here too... */
1.60 oster 2895: /* note that we are not checking partitionSize here..
2896:
2897: Note that we are also not checking the mod_counters here.
1.186 perry 2898: If everything else matches execpt the mod_counter, that's
1.60 oster 2899: good enough for this test. We will deal with the mod_counters
1.186 perry 2900: a little later in the autoconfiguration process.
1.60 oster 2901:
2902: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 2903:
2904: The reason we don't check for this is that failed disks
2905: will have lower modification counts. If those disks are
2906: not added to the set they used to belong to, then they will
2907: form their own set, which may result in 2 different sets,
2908: for example, competing to be configured at raid0, and
2909: perhaps competing to be the root filesystem set. If the
2910: wrong ones get configured, or both attempt to become /,
2911: weird behaviour and or serious lossage will occur. Thus we
2912: need to bring them into the fold here, and kick them out at
2913: a later point.
1.60 oster 2914:
2915: */
1.48 oster 2916:
2917: clabel1 = cset->ac->clabel;
2918: clabel2 = ac->clabel;
2919: if ((clabel1->version == clabel2->version) &&
2920: (clabel1->serial_number == clabel2->serial_number) &&
2921: (clabel1->num_rows == clabel2->num_rows) &&
2922: (clabel1->num_columns == clabel2->num_columns) &&
2923: (clabel1->sectPerSU == clabel2->sectPerSU) &&
2924: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
2925: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
2926: (clabel1->parityConfig == clabel2->parityConfig) &&
2927: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
2928: (clabel1->blockSize == clabel2->blockSize) &&
2929: (clabel1->numBlocks == clabel2->numBlocks) &&
2930: (clabel1->autoconfigure == clabel2->autoconfigure) &&
2931: (clabel1->root_partition == clabel2->root_partition) &&
2932: (clabel1->last_unit == clabel2->last_unit) &&
2933: (clabel1->config_order == clabel2->config_order)) {
2934: /* if it get's here, it almost *has* to be a match */
2935: } else {
1.186 perry 2936: /* it's not consistent with somebody in the set..
1.48 oster 2937: punt */
2938: return(0);
2939: }
2940: /* all was fine.. it must fit... */
2941: return(1);
2942: }
2943:
2944: int
1.169 oster 2945: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48 oster 2946: {
1.51 oster 2947: RF_AutoConfig_t *ac;
2948: RF_AutoConfig_t *auto_config;
2949: RF_ComponentLabel_t *clabel;
1.166 oster 2950: int c;
1.51 oster 2951: int num_cols;
2952: int num_missing;
1.86 oster 2953: int mod_counter;
1.87 oster 2954: int mod_counter_found;
1.88 oster 2955: int even_pair_failed;
2956: char parity_type;
1.186 perry 2957:
1.51 oster 2958:
1.48 oster 2959: /* check to see that we have enough 'live' components
2960: of this set. If so, we can configure it if necessary */
2961:
1.51 oster 2962: num_cols = cset->ac->clabel->num_columns;
1.88 oster 2963: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 2964:
2965: /* XXX Check for duplicate components!?!?!? */
2966:
1.86 oster 2967: /* Determine what the mod_counter is supposed to be for this set. */
2968:
1.87 oster 2969: mod_counter_found = 0;
1.101 oster 2970: mod_counter = 0;
1.86 oster 2971: ac = cset->ac;
2972: while(ac!=NULL) {
1.87 oster 2973: if (mod_counter_found==0) {
1.86 oster 2974: mod_counter = ac->clabel->mod_counter;
1.87 oster 2975: mod_counter_found = 1;
2976: } else {
2977: if (ac->clabel->mod_counter > mod_counter) {
2978: mod_counter = ac->clabel->mod_counter;
2979: }
1.86 oster 2980: }
2981: ac = ac->next;
2982: }
2983:
1.51 oster 2984: num_missing = 0;
2985: auto_config = cset->ac;
2986:
1.166 oster 2987: even_pair_failed = 0;
2988: for(c=0; c<num_cols; c++) {
2989: ac = auto_config;
2990: while(ac!=NULL) {
1.186 perry 2991: if ((ac->clabel->column == c) &&
1.166 oster 2992: (ac->clabel->mod_counter == mod_counter)) {
2993: /* it's this one... */
1.51 oster 2994: #if DEBUG
1.166 oster 2995: printf("Found: %s at %d\n",
2996: ac->devname,c);
1.51 oster 2997: #endif
1.166 oster 2998: break;
1.51 oster 2999: }
1.166 oster 3000: ac=ac->next;
3001: }
3002: if (ac==NULL) {
1.51 oster 3003: /* Didn't find one here! */
1.88 oster 3004: /* special case for RAID 1, especially
3005: where there are more than 2
3006: components (where RAIDframe treats
3007: things a little differently :( ) */
1.166 oster 3008: if (parity_type == '1') {
3009: if (c%2 == 0) { /* even component */
3010: even_pair_failed = 1;
3011: } else { /* odd component. If
3012: we're failed, and
3013: so is the even
3014: component, it's
3015: "Good Night, Charlie" */
3016: if (even_pair_failed == 1) {
3017: return(0);
1.88 oster 3018: }
3019: }
1.166 oster 3020: } else {
3021: /* normal accounting */
3022: num_missing++;
1.88 oster 3023: }
1.166 oster 3024: }
3025: if ((parity_type == '1') && (c%2 == 1)) {
1.88 oster 3026: /* Just did an even component, and we didn't
1.186 perry 3027: bail.. reset the even_pair_failed flag,
1.88 oster 3028: and go on to the next component.... */
1.166 oster 3029: even_pair_failed = 0;
1.51 oster 3030: }
3031: }
3032:
3033: clabel = cset->ac->clabel;
3034:
3035: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3036: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3037: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3038: /* XXX this needs to be made *much* more general */
3039: /* Too many failures */
3040: return(0);
3041: }
3042: /* otherwise, all is well, and we've got enough to take a kick
3043: at autoconfiguring this set */
3044: return(1);
1.48 oster 3045: }
3046:
3047: void
1.169 oster 3048: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
3049: RF_Raid_t *raidPtr)
1.48 oster 3050: {
3051: RF_ComponentLabel_t *clabel;
1.77 oster 3052: int i;
1.48 oster 3053:
3054: clabel = ac->clabel;
3055:
3056: /* 1. Fill in the common stuff */
1.166 oster 3057: config->numRow = clabel->num_rows = 1;
1.48 oster 3058: config->numCol = clabel->num_columns;
3059: config->numSpare = 0; /* XXX should this be set here? */
3060: config->sectPerSU = clabel->sectPerSU;
3061: config->SUsPerPU = clabel->SUsPerPU;
3062: config->SUsPerRU = clabel->SUsPerRU;
3063: config->parityConfig = clabel->parityConfig;
3064: /* XXX... */
3065: strcpy(config->diskQueueType,"fifo");
3066: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3067: config->layoutSpecificSize = 0; /* XXX ?? */
3068:
3069: while(ac!=NULL) {
3070: /* row/col values will be in range due to the checks
3071: in reasonable_label() */
1.166 oster 3072: strcpy(config->devnames[0][ac->clabel->column],
1.48 oster 3073: ac->devname);
3074: ac = ac->next;
3075: }
3076:
1.77 oster 3077: for(i=0;i<RF_MAXDBGV;i++) {
1.163 fvdl 3078: config->debugVars[i][0] = 0;
1.77 oster 3079: }
1.48 oster 3080: }
3081:
3082: int
1.169 oster 3083: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3084: {
3085: RF_ComponentLabel_t clabel;
3086: struct vnode *vp;
3087: dev_t dev;
1.166 oster 3088: int column;
1.148 oster 3089: int sparecol;
1.48 oster 3090:
1.54 oster 3091: raidPtr->autoconfigure = new_value;
1.166 oster 3092:
3093: for(column=0; column<raidPtr->numCol; column++) {
3094: if (raidPtr->Disks[column].status == rf_ds_optimal) {
3095: dev = raidPtr->Disks[column].dev;
3096: vp = raidPtr->raid_cinfo[column].ci_vp;
3097: raidread_component_label(dev, vp, &clabel);
3098: clabel.autoconfigure = new_value;
3099: raidwrite_component_label(dev, vp, &clabel);
1.48 oster 3100: }
3101: }
1.148 oster 3102: for(column = 0; column < raidPtr->numSpare ; column++) {
3103: sparecol = raidPtr->numCol + column;
1.166 oster 3104: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3105: dev = raidPtr->Disks[sparecol].dev;
3106: vp = raidPtr->raid_cinfo[sparecol].ci_vp;
1.148 oster 3107: raidread_component_label(dev, vp, &clabel);
3108: clabel.autoconfigure = new_value;
3109: raidwrite_component_label(dev, vp, &clabel);
3110: }
3111: }
1.48 oster 3112: return(new_value);
3113: }
3114:
3115: int
1.169 oster 3116: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3117: {
3118: RF_ComponentLabel_t clabel;
3119: struct vnode *vp;
3120: dev_t dev;
1.166 oster 3121: int column;
1.148 oster 3122: int sparecol;
1.48 oster 3123:
1.54 oster 3124: raidPtr->root_partition = new_value;
1.166 oster 3125: for(column=0; column<raidPtr->numCol; column++) {
3126: if (raidPtr->Disks[column].status == rf_ds_optimal) {
3127: dev = raidPtr->Disks[column].dev;
3128: vp = raidPtr->raid_cinfo[column].ci_vp;
3129: raidread_component_label(dev, vp, &clabel);
3130: clabel.root_partition = new_value;
3131: raidwrite_component_label(dev, vp, &clabel);
1.148 oster 3132: }
3133: }
3134: for(column = 0; column < raidPtr->numSpare ; column++) {
3135: sparecol = raidPtr->numCol + column;
1.166 oster 3136: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3137: dev = raidPtr->Disks[sparecol].dev;
3138: vp = raidPtr->raid_cinfo[sparecol].ci_vp;
1.148 oster 3139: raidread_component_label(dev, vp, &clabel);
3140: clabel.root_partition = new_value;
3141: raidwrite_component_label(dev, vp, &clabel);
1.48 oster 3142: }
3143: }
3144: return(new_value);
3145: }
3146:
3147: void
1.169 oster 3148: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48 oster 3149: {
3150: RF_AutoConfig_t *ac;
1.186 perry 3151:
1.48 oster 3152: ac = cset->ac;
3153: while(ac!=NULL) {
3154: /* Close the vp, and give it back */
3155: if (ac->vp) {
1.96 oster 3156: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.48 oster 3157: VOP_CLOSE(ac->vp, FREAD, NOCRED, 0);
3158: vput(ac->vp);
1.86 oster 3159: ac->vp = NULL;
1.48 oster 3160: }
3161: ac = ac->next;
3162: }
3163: }
3164:
3165:
3166: void
1.169 oster 3167: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48 oster 3168: {
3169: RF_AutoConfig_t *ac;
3170: RF_AutoConfig_t *next_ac;
1.186 perry 3171:
1.48 oster 3172: ac = cset->ac;
3173: while(ac!=NULL) {
3174: next_ac = ac->next;
3175: /* nuke the label */
3176: free(ac->clabel, M_RAIDFRAME);
3177: /* cleanup the config structure */
3178: free(ac, M_RAIDFRAME);
3179: /* "next.." */
3180: ac = next_ac;
3181: }
3182: /* and, finally, nuke the config set */
3183: free(cset, M_RAIDFRAME);
3184: }
3185:
3186:
3187: void
1.169 oster 3188: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48 oster 3189: {
3190: /* current version number */
1.186 perry 3191: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3192: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3193: clabel->mod_counter = raidPtr->mod_counter;
1.166 oster 3194: clabel->num_rows = 1;
1.48 oster 3195: clabel->num_columns = raidPtr->numCol;
3196: clabel->clean = RF_RAID_DIRTY; /* not clean */
3197: clabel->status = rf_ds_optimal; /* "It's good!" */
1.186 perry 3198:
1.48 oster 3199: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3200: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3201: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3202:
3203: clabel->blockSize = raidPtr->bytesPerSector;
3204: clabel->numBlocks = raidPtr->sectorsPerDisk;
3205:
1.48 oster 3206: /* XXX not portable */
3207: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3208: clabel->maxOutstanding = raidPtr->maxOutstanding;
3209: clabel->autoconfigure = raidPtr->autoconfigure;
3210: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3211: clabel->last_unit = raidPtr->raidid;
1.54 oster 3212: clabel->config_order = raidPtr->config_order;
1.51 oster 3213: }
3214:
3215: int
1.169 oster 3216: rf_auto_config_set(RF_ConfigSet_t *cset, int *unit)
1.51 oster 3217: {
3218: RF_Raid_t *raidPtr;
3219: RF_Config_t *config;
3220: int raidID;
3221: int retcode;
3222:
1.127 oster 3223: #if DEBUG
1.72 oster 3224: printf("RAID autoconfigure\n");
1.127 oster 3225: #endif
1.51 oster 3226:
3227: retcode = 0;
3228: *unit = -1;
3229:
3230: /* 1. Create a config structure */
3231:
3232: config = (RF_Config_t *)malloc(sizeof(RF_Config_t),
3233: M_RAIDFRAME,
3234: M_NOWAIT);
3235: if (config==NULL) {
3236: printf("Out of mem!?!?\n");
3237: /* XXX do something more intelligent here. */
3238: return(1);
3239: }
1.77 oster 3240:
3241: memset(config, 0, sizeof(RF_Config_t));
1.51 oster 3242:
1.186 perry 3243: /*
3244: 2. Figure out what RAID ID this one is supposed to live at
1.51 oster 3245: See if we can get the same RAID dev that it was configured
1.186 perry 3246: on last time..
1.51 oster 3247: */
3248:
3249: raidID = cset->ac->clabel->last_unit;
1.52 oster 3250: if ((raidID < 0) || (raidID >= numraid)) {
1.51 oster 3251: /* let's not wander off into lala land. */
3252: raidID = numraid - 1;
3253: }
3254: if (raidPtrs[raidID]->valid != 0) {
3255:
1.186 perry 3256: /*
3257: Nope... Go looking for an alternative...
1.51 oster 3258: Start high so we don't immediately use raid0 if that's
1.186 perry 3259: not taken.
1.51 oster 3260: */
3261:
1.115 oster 3262: for(raidID = numraid - 1; raidID >= 0; raidID--) {
1.51 oster 3263: if (raidPtrs[raidID]->valid == 0) {
3264: /* can use this one! */
3265: break;
3266: }
3267: }
3268: }
3269:
3270: if (raidID < 0) {
3271: /* punt... */
3272: printf("Unable to auto configure this set!\n");
3273: printf("(Out of RAID devs!)\n");
3274: return(1);
3275: }
1.127 oster 3276:
3277: #if DEBUG
1.72 oster 3278: printf("Configuring raid%d:\n",raidID);
1.127 oster 3279: #endif
3280:
1.51 oster 3281: raidPtr = raidPtrs[raidID];
3282:
3283: /* XXX all this stuff should be done SOMEWHERE ELSE! */
3284: raidPtr->raidid = raidID;
3285: raidPtr->openings = RAIDOUTSTANDING;
3286:
3287: /* 3. Build the configuration structure */
3288: rf_create_configuration(cset->ac, config, raidPtr);
3289:
3290: /* 4. Do the configuration */
3291: retcode = rf_Configure(raidPtr, config, cset->ac);
1.186 perry 3292:
1.51 oster 3293: if (retcode == 0) {
1.61 oster 3294:
1.59 oster 3295: raidinit(raidPtrs[raidID]);
3296:
3297: rf_markalldirty(raidPtrs[raidID]);
1.54 oster 3298: raidPtrs[raidID]->autoconfigure = 1; /* XXX do this here? */
1.51 oster 3299: if (cset->ac->clabel->root_partition==1) {
3300: /* everything configured just fine. Make a note
3301: that this set is eligible to be root. */
3302: cset->rootable = 1;
1.54 oster 3303: /* XXX do this here? */
1.186 perry 3304: raidPtrs[raidID]->root_partition = 1;
1.51 oster 3305: }
3306: }
3307:
3308: /* 5. Cleanup */
3309: free(config, M_RAIDFRAME);
1.186 perry 3310:
1.51 oster 3311: *unit = raidID;
3312: return(retcode);
1.99 oster 3313: }
3314:
3315: void
1.169 oster 3316: rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
1.99 oster 3317: {
3318: struct buf *bp;
3319:
3320: bp = (struct buf *)desc->bp;
1.186 perry 3321: disk_unbusy(&raid_softc[desc->raidPtr->raidid].sc_dkdev,
1.145 mrg 3322: (bp->b_bcount - bp->b_resid), (bp->b_flags & B_READ));
1.13 oster 3323: }
1.177 oster 3324:
3325: void
1.187 christos 3326: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3327: size_t xmin, size_t xmax)
1.177 oster 3328: {
1.186 perry 3329: pool_init(p, size, 0, 0, 0, w_chan, NULL);
1.187 christos 3330: pool_sethiwat(p, xmax);
3331: pool_prime(p, xmin);
3332: pool_setlowat(p, xmin);
1.177 oster 3333: }
1.190 ! oster 3334:
! 3335: /*
! 3336: * rf_buf_queue_check(int raidid) -- looks into the buf_queue to see
! 3337: * if there is IO pending and if that IO could possibly be done for a
! 3338: * given RAID set. Returns 0 if IO is waiting and can be done, 1
! 3339: * otherwise.
! 3340: *
! 3341: */
! 3342:
! 3343: int
! 3344: rf_buf_queue_check(int raidid)
! 3345: {
! 3346: if ((BUFQ_PEEK(&(raid_softc[raidid].buf_queue)) != NULL) &&
! 3347: raidPtrs[raidid]->openings > 0) {
! 3348: /* there is work to do */
! 3349: return 0;
! 3350: }
! 3351: /* default is nothing to do */
! 3352: return 1;
! 3353: }
CVSweb <webmaster@jp.NetBSD.org>