Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.370
1.370 ! christos 1: /* $NetBSD: rf_netbsdkintf.c,v 1.369 2019/02/06 02:49:50 oster Exp $ */
1.281 rmind 2:
1.1 oster 3: /*-
1.295 erh 4: * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1 oster 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Greg Oster; Jason R. Thorpe.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
1.281 rmind 33: * Copyright (c) 1988 University of Utah.
1.1 oster 34: * Copyright (c) 1990, 1993
35: * The Regents of the University of California. All rights reserved.
36: *
37: * This code is derived from software contributed to Berkeley by
38: * the Systems Programming Group of the University of Utah Computer
39: * Science Department.
40: *
41: * Redistribution and use in source and binary forms, with or without
42: * modification, are permitted provided that the following conditions
43: * are met:
44: * 1. Redistributions of source code must retain the above copyright
45: * notice, this list of conditions and the following disclaimer.
46: * 2. Redistributions in binary form must reproduce the above copyright
47: * notice, this list of conditions and the following disclaimer in the
48: * documentation and/or other materials provided with the distribution.
1.162 agc 49: * 3. Neither the name of the University nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63: * SUCH DAMAGE.
64: *
65: * from: Utah $Hdr: cd.c 1.6 90/11/28$
66: *
67: * @(#)cd.c 8.2 (Berkeley) 11/16/93
68: */
69:
70: /*
1.1 oster 71: * Copyright (c) 1995 Carnegie-Mellon University.
72: * All rights reserved.
73: *
74: * Authors: Mark Holland, Jim Zelenka
75: *
76: * Permission to use, copy, modify and distribute this software and
77: * its documentation is hereby granted, provided that both the copyright
78: * notice and this permission notice appear in all copies of the
79: * software, derivative works or modified versions, and any portions
80: * thereof, and that both notices appear in supporting documentation.
81: *
82: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85: *
86: * Carnegie Mellon requests users of this software to return to
87: *
88: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
89: * School of Computer Science
90: * Carnegie Mellon University
91: * Pittsburgh PA 15213-3890
92: *
93: * any improvements or extensions that they make and grant Carnegie the
94: * rights to redistribute these changes.
95: */
96:
97: /***********************************************************
98: *
99: * rf_kintf.c -- the kernel interface routines for RAIDframe
100: *
101: ***********************************************************/
1.112 lukem 102:
103: #include <sys/cdefs.h>
1.370 ! christos 104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.369 2019/02/06 02:49:50 oster Exp $");
1.251 ad 105:
106: #ifdef _KERNEL_OPT
107: #include "opt_raid_autoconfig.h"
1.363 mrg 108: #include "opt_compat_netbsd32.h"
1.251 ad 109: #endif
1.1 oster 110:
1.113 lukem 111: #include <sys/param.h>
1.1 oster 112: #include <sys/errno.h>
113: #include <sys/pool.h>
1.152 thorpej 114: #include <sys/proc.h>
1.1 oster 115: #include <sys/queue.h>
116: #include <sys/disk.h>
117: #include <sys/device.h>
118: #include <sys/stat.h>
119: #include <sys/ioctl.h>
120: #include <sys/fcntl.h>
121: #include <sys/systm.h>
122: #include <sys/vnode.h>
123: #include <sys/disklabel.h>
124: #include <sys/conf.h>
125: #include <sys/buf.h>
1.182 yamt 126: #include <sys/bufq.h>
1.65 oster 127: #include <sys/reboot.h>
1.208 elad 128: #include <sys/kauth.h>
1.327 pgoyette 129: #include <sys/module.h>
1.358 pgoyette 130: #include <sys/compat_stub.h>
1.8 oster 131:
1.234 oster 132: #include <prop/proplib.h>
133:
1.110 oster 134: #include <dev/raidframe/raidframevar.h>
135: #include <dev/raidframe/raidframeio.h>
1.269 jld 136: #include <dev/raidframe/rf_paritymap.h>
1.251 ad 137:
1.1 oster 138: #include "rf_raid.h"
1.44 oster 139: #include "rf_copyback.h"
1.1 oster 140: #include "rf_dag.h"
141: #include "rf_dagflags.h"
1.99 oster 142: #include "rf_desc.h"
1.1 oster 143: #include "rf_diskqueue.h"
144: #include "rf_etimer.h"
145: #include "rf_general.h"
146: #include "rf_kintf.h"
147: #include "rf_options.h"
148: #include "rf_driver.h"
149: #include "rf_parityscan.h"
150: #include "rf_threadstuff.h"
151:
1.325 christos 152: #include "ioconf.h"
153:
1.133 oster 154: #ifdef DEBUG
1.9 oster 155: int rf_kdebug_level = 0;
1.1 oster 156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 157: #else /* DEBUG */
1.1 oster 158: #define db1_printf(a) { }
1.9 oster 159: #endif /* DEBUG */
1.1 oster 160:
1.344 christos 161: #ifdef DEBUG_ROOT
162: #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
1.345 christos 163: #else
164: #define DPRINTF(a, ...)
1.344 christos 165: #endif
166:
1.249 oster 167: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289 mrg 168: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287 mrg 169: static rf_declare_cond2(rf_sparet_wait_cv);
170: static rf_declare_cond2(rf_sparet_resp_cv);
1.1 oster 171:
1.10 oster 172: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173: * spare table */
174: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175: * installation process */
1.249 oster 176: #endif
1.153 thorpej 177:
178: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10 oster 179:
1.1 oster 180: /* prototypes */
1.187 christos 181: static void KernelWakeupFunc(struct buf *);
182: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225 christos 183: dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.187 christos 184: void *, int, struct proc *);
1.300 christos 185: static void raidinit(struct raid_softc *);
1.335 mlelstv 186: static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
1.348 jdolecek 187: static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
1.1 oster 188:
1.261 dyoung 189: static int raid_match(device_t, cfdata_t, void *);
190: static void raid_attach(device_t, device_t, void *);
191: static int raid_detach(device_t, int);
1.130 gehenna 192:
1.269 jld 193: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
194: daddr_t, daddr_t);
195: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
196: daddr_t, daddr_t, int);
197:
1.276 mrg 198: static int raidwrite_component_label(unsigned,
199: dev_t, struct vnode *, RF_ComponentLabel_t *);
200: static int raidread_component_label(unsigned,
201: dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269 jld 202:
1.335 mlelstv 203: static int raid_diskstart(device_t, struct buf *bp);
204: static int raid_dumpblocks(device_t, void *, daddr_t, int);
205: static int raid_lastclose(device_t);
1.269 jld 206:
1.324 mrg 207: static dev_type_open(raidopen);
208: static dev_type_close(raidclose);
209: static dev_type_read(raidread);
210: static dev_type_write(raidwrite);
211: static dev_type_ioctl(raidioctl);
212: static dev_type_strategy(raidstrategy);
213: static dev_type_dump(raiddump);
214: static dev_type_size(raidsize);
1.130 gehenna 215:
216: const struct bdevsw raid_bdevsw = {
1.305 dholland 217: .d_open = raidopen,
218: .d_close = raidclose,
219: .d_strategy = raidstrategy,
220: .d_ioctl = raidioctl,
221: .d_dump = raiddump,
222: .d_psize = raidsize,
1.311 dholland 223: .d_discard = nodiscard,
1.305 dholland 224: .d_flag = D_DISK
1.130 gehenna 225: };
226:
227: const struct cdevsw raid_cdevsw = {
1.305 dholland 228: .d_open = raidopen,
229: .d_close = raidclose,
230: .d_read = raidread,
231: .d_write = raidwrite,
232: .d_ioctl = raidioctl,
233: .d_stop = nostop,
234: .d_tty = notty,
235: .d_poll = nopoll,
236: .d_mmap = nommap,
237: .d_kqfilter = nokqfilter,
1.312 dholland 238: .d_discard = nodiscard,
1.305 dholland 239: .d_flag = D_DISK
1.130 gehenna 240: };
1.1 oster 241:
1.323 mlelstv 242: static struct dkdriver rf_dkdriver = {
1.335 mlelstv 243: .d_open = raidopen,
244: .d_close = raidclose,
1.323 mlelstv 245: .d_strategy = raidstrategy,
1.335 mlelstv 246: .d_diskstart = raid_diskstart,
247: .d_dumpblocks = raid_dumpblocks,
248: .d_lastclose = raid_lastclose,
1.323 mlelstv 249: .d_minphys = minphys
250: };
1.235 oster 251:
1.1 oster 252: #define raidunit(x) DISKUNIT(x)
1.335 mlelstv 253: #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
1.1 oster 254:
1.202 oster 255: extern struct cfdriver raid_cd;
1.266 dyoung 256: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257: raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258: DVF_DETACH_SHUTDOWN);
1.202 oster 259:
1.353 mrg 260: /* Internal representation of a rf_recon_req */
261: struct rf_recon_req_internal {
262: RF_RowCol_t col;
263: RF_ReconReqFlags_t flags;
264: void *raidPtr;
265: };
266:
1.186 perry 267: /*
268: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 270: * kernel memory, especially on writes, and in degraded mode reads.
1.186 perry 271: *
272: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273: * a single 64K write will typically require 64K for the old data,
274: * 64K for the old parity, and 64K for the new parity, for a total
1.28 oster 275: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 276: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 277: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186 perry 278: *
1.28 oster 279: * Now in degraded mode, for example, a 64K read on the above setup may
1.186 perry 280: * require data reconstruction, which will require *all* of the 4 remaining
1.28 oster 281: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 282: */
283:
284: #ifndef RAIDOUTSTANDING
1.28 oster 285: #define RAIDOUTSTANDING 6
1.20 oster 286: #endif
287:
1.1 oster 288: #define RAIDLABELDEV(dev) \
289: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290:
291: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9 oster 292:
1.104 oster 293: static int raidlock(struct raid_softc *);
294: static void raidunlock(struct raid_softc *);
1.1 oster 295:
1.266 dyoung 296: static int raid_detach_unlocked(struct raid_softc *);
297:
1.104 oster 298: static void rf_markalldirty(RF_Raid_t *);
1.304 christos 299: static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
1.48 oster 300:
1.353 mrg 301: void rf_ReconThread(struct rf_recon_req_internal *);
1.104 oster 302: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303: void rf_CopybackThread(RF_Raid_t *raidPtr);
1.353 mrg 304: void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
1.261 dyoung 305: int rf_autoconfig(device_t);
1.142 thorpej 306: void rf_buildroothack(RF_ConfigSet_t *);
1.104 oster 307:
308: RF_AutoConfig_t *rf_find_raid_components(void);
309: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.292 oster 311: int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
1.104 oster 312: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313: int rf_set_autoconfig(RF_Raid_t *, int);
314: int rf_set_rootpartition(RF_Raid_t *, int);
315: void rf_release_all_vps(RF_ConfigSet_t *);
316: void rf_cleanup_config_set(RF_ConfigSet_t *);
317: int rf_have_enough_components(RF_ConfigSet_t *);
1.300 christos 318: struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278 mrg 319: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48 oster 320:
1.295 erh 321: /*
322: * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323: * Note that this is overridden by having RAID_AUTOCONFIG as an option
324: * in the kernel config file.
325: */
326: #ifdef RAID_AUTOCONFIG
327: int raidautoconfig = 1;
328: #else
329: int raidautoconfig = 0;
330: #endif
331: static bool raidautoconfigdone = false;
1.37 oster 332:
1.177 oster 333: struct RF_Pools_s rf_pools;
334:
1.300 christos 335: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336: static kmutex_t raid_lock;
1.1 oster 337:
1.300 christos 338: static struct raid_softc *
339: raidcreate(int unit) {
340: struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341: sc->sc_unit = unit;
1.327 pgoyette 342: cv_init(&sc->sc_cv, "raidunit");
343: mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
1.300 christos 344: return sc;
345: }
1.1 oster 346:
1.300 christos 347: static void
348: raiddestroy(struct raid_softc *sc) {
1.327 pgoyette 349: cv_destroy(&sc->sc_cv);
350: mutex_destroy(&sc->sc_mutex);
1.300 christos 351: kmem_free(sc, sizeof(*sc));
352: }
1.50 oster 353:
1.300 christos 354: static struct raid_softc *
1.327 pgoyette 355: raidget(int unit, bool create) {
1.300 christos 356: struct raid_softc *sc;
357: if (unit < 0) {
358: #ifdef DIAGNOSTIC
359: panic("%s: unit %d!", __func__, unit);
360: #endif
361: return NULL;
362: }
363: mutex_enter(&raid_lock);
364: LIST_FOREACH(sc, &raids, sc_link) {
365: if (sc->sc_unit == unit) {
366: mutex_exit(&raid_lock);
367: return sc;
368: }
369: }
370: mutex_exit(&raid_lock);
1.327 pgoyette 371: if (!create)
372: return NULL;
1.300 christos 373: if ((sc = raidcreate(unit)) == NULL)
374: return NULL;
375: mutex_enter(&raid_lock);
376: LIST_INSERT_HEAD(&raids, sc, sc_link);
377: mutex_exit(&raid_lock);
378: return sc;
379: }
380:
381: static void
382: raidput(struct raid_softc *sc) {
383: mutex_enter(&raid_lock);
384: LIST_REMOVE(sc, sc_link);
385: mutex_exit(&raid_lock);
386: raiddestroy(sc);
387: }
1.1 oster 388:
1.300 christos 389: void
390: raidattach(int num)
391: {
1.62 oster 392:
1.142 thorpej 393: /*
1.327 pgoyette 394: * Device attachment and associated initialization now occurs
395: * as part of the module initialization.
1.142 thorpej 396: */
397: }
398:
399: int
1.261 dyoung 400: rf_autoconfig(device_t self)
1.142 thorpej 401: {
402: RF_AutoConfig_t *ac_list;
403: RF_ConfigSet_t *config_sets;
404:
1.295 erh 405: if (!raidautoconfig || raidautoconfigdone == true)
1.142 thorpej 406: return (0);
407:
408: /* XXX This code can only be run once. */
1.295 erh 409: raidautoconfigdone = true;
1.142 thorpej 410:
1.307 christos 411: #ifdef __HAVE_CPU_BOOTCONF
412: /*
413: * 0. find the boot device if needed first so we can use it later
414: * this needs to be done before we autoconfigure any raid sets,
415: * because if we use wedges we are not going to be able to open
416: * the boot device later
417: */
418: if (booted_device == NULL)
419: cpu_bootconf();
420: #endif
1.48 oster 421: /* 1. locate all RAID components on the system */
1.258 ad 422: aprint_debug("Searching for RAID components...\n");
1.48 oster 423: ac_list = rf_find_raid_components();
424:
1.142 thorpej 425: /* 2. Sort them into their respective sets. */
1.48 oster 426: config_sets = rf_create_auto_sets(ac_list);
427:
1.142 thorpej 428: /*
1.299 oster 429: * 3. Evaluate each set and configure the valid ones.
1.142 thorpej 430: * This gets done in rf_buildroothack().
431: */
432: rf_buildroothack(config_sets);
1.48 oster 433:
1.213 christos 434: return 1;
1.48 oster 435: }
436:
1.367 christos 437: int
438: rf_inited(const struct raid_softc *rs) {
439: return (rs->sc_flags & RAIDF_INITED) != 0;
440: }
441:
1.368 oster 442: RF_Raid_t *
443: rf_get_raid(struct raid_softc *rs) {
444: return &rs->sc_r;
445: }
446:
1.367 christos 447: int
448: rf_get_unit(const struct raid_softc *rs) {
449: return rs->sc_unit;
450: }
451:
1.306 christos 452: static int
1.307 christos 453: rf_containsboot(RF_Raid_t *r, device_t bdv) {
1.359 bad 454: const char *bootname;
455: size_t len;
456:
457: /* if bdv is NULL, the set can't contain it. exit early. */
458: if (bdv == NULL)
459: return 0;
460:
461: bootname = device_xname(bdv);
462: len = strlen(bootname);
1.306 christos 463:
464: for (int col = 0; col < r->numCol; col++) {
1.307 christos 465: const char *devname = r->Disks[col].devname;
1.306 christos 466: devname += sizeof("/dev/") - 1;
1.307 christos 467: if (strncmp(devname, "dk", 2) == 0) {
468: const char *parent =
469: dkwedge_get_parent_name(r->Disks[col].dev);
470: if (parent != NULL)
471: devname = parent;
472: }
1.306 christos 473: if (strncmp(devname, bootname, len) == 0) {
474: struct raid_softc *sc = r->softc;
475: aprint_debug("raid%d includes boot device %s\n",
476: sc->sc_unit, devname);
477: return 1;
478: }
479: }
480: return 0;
481: }
482:
1.48 oster 483: void
1.142 thorpej 484: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48 oster 485: {
486: RF_ConfigSet_t *cset;
487: RF_ConfigSet_t *next_cset;
1.51 oster 488: int num_root;
1.300 christos 489: struct raid_softc *sc, *rsc;
1.335 mlelstv 490: struct dk_softc *dksc;
1.48 oster 491:
1.300 christos 492: sc = rsc = NULL;
1.51 oster 493: num_root = 0;
1.48 oster 494: cset = config_sets;
1.271 dyoung 495: while (cset != NULL) {
1.48 oster 496: next_cset = cset->next;
1.186 perry 497: if (rf_have_enough_components(cset) &&
1.300 christos 498: cset->ac->clabel->autoconfigure == 1) {
499: sc = rf_auto_config_set(cset);
500: if (sc != NULL) {
1.359 bad 501: aprint_debug("raid%d: configured ok, rootable %d\n",
502: sc->sc_unit, cset->rootable);
1.51 oster 503: if (cset->rootable) {
1.300 christos 504: rsc = sc;
1.51 oster 505: num_root++;
506: }
507: } else {
508: /* The autoconfig didn't work :( */
1.300 christos 509: aprint_debug("Autoconfig failed\n");
1.51 oster 510: rf_release_all_vps(cset);
1.48 oster 511: }
512: } else {
1.186 perry 513: /* we're not autoconfiguring this set...
1.48 oster 514: release the associated resources */
1.49 oster 515: rf_release_all_vps(cset);
1.48 oster 516: }
517: /* cleanup */
1.49 oster 518: rf_cleanup_config_set(cset);
1.48 oster 519: cset = next_cset;
520: }
1.335 mlelstv 521: dksc = &rsc->sc_dksc;
1.122 oster 522:
1.223 oster 523: /* if the user has specified what the root device should be
524: then we don't touch booted_device or boothowto... */
525:
1.359 bad 526: if (rootspec != NULL) {
527: DPRINTF("%s: rootspec %s\n", __func__, rootspec);
1.223 oster 528: return;
1.359 bad 529: }
1.223 oster 530:
1.122 oster 531: /* we found something bootable... */
532:
1.310 christos 533: /*
534: * XXX: The following code assumes that the root raid
535: * is the first ('a') partition. This is about the best
536: * we can do with a BSD disklabel, but we might be able
537: * to do better with a GPT label, by setting a specified
538: * attribute to indicate the root partition. We can then
539: * stash the partition number in the r->root_partition
540: * high bits (the bottom 2 bits are already used). For
541: * now we just set booted_partition to 0 when we override
542: * root.
543: */
1.122 oster 544: if (num_root == 1) {
1.306 christos 545: device_t candidate_root;
1.335 mlelstv 546: if (dksc->sc_dkdev.dk_nwedges != 0) {
1.297 christos 547: char cname[sizeof(cset->ac->devname)];
1.344 christos 548: /* XXX: assume partition 'a' first */
1.297 christos 549: snprintf(cname, sizeof(cname), "%s%c",
1.335 mlelstv 550: device_xname(dksc->sc_dev), 'a');
1.306 christos 551: candidate_root = dkwedge_find_by_wname(cname);
1.344 christos 552: DPRINTF("%s: candidate wedge root=%s\n", __func__,
553: cname);
554: if (candidate_root == NULL) {
555: /*
556: * If that is not found, because we don't use
557: * disklabel, return the first dk child
558: * XXX: we can skip the 'a' check above
559: * and always do this...
560: */
561: size_t i = 0;
562: candidate_root = dkwedge_find_by_parent(
563: device_xname(dksc->sc_dev), &i);
564: }
565: DPRINTF("%s: candidate wedge root=%p\n", __func__,
566: candidate_root);
1.297 christos 567: } else
1.335 mlelstv 568: candidate_root = dksc->sc_dev;
1.344 christos 569: DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570: DPRINTF("%s: booted_device=%p root_partition=%d "
1.359 bad 571: "contains_boot=%d",
572: __func__, booted_device, rsc->sc_r.root_partition,
573: rf_containsboot(&rsc->sc_r, booted_device));
574: /* XXX the check for booted_device == NULL can probably be
575: * dropped, now that rf_containsboot handles that case.
576: */
1.308 christos 577: if (booted_device == NULL ||
578: rsc->sc_r.root_partition == 1 ||
1.310 christos 579: rf_containsboot(&rsc->sc_r, booted_device)) {
1.308 christos 580: booted_device = candidate_root;
1.351 christos 581: booted_method = "raidframe/single";
1.310 christos 582: booted_partition = 0; /* XXX assume 'a' */
583: }
1.122 oster 584: } else if (num_root > 1) {
1.344 christos 585: DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
586: booted_device);
1.226 oster 587:
588: /*
589: * Maybe the MD code can help. If it cannot, then
590: * setroot() will discover that we have no
591: * booted_device and will ask the user if nothing was
592: * hardwired in the kernel config file
593: */
594: if (booted_device == NULL)
595: return;
596:
597: num_root = 0;
1.300 christos 598: mutex_enter(&raid_lock);
599: LIST_FOREACH(sc, &raids, sc_link) {
600: RF_Raid_t *r = &sc->sc_r;
601: if (r->valid == 0)
1.226 oster 602: continue;
603:
1.300 christos 604: if (r->root_partition == 0)
1.226 oster 605: continue;
606:
1.306 christos 607: if (rf_containsboot(r, booted_device)) {
1.226 oster 608: num_root++;
1.300 christos 609: rsc = sc;
1.335 mlelstv 610: dksc = &rsc->sc_dksc;
1.226 oster 611: }
612: }
1.300 christos 613: mutex_exit(&raid_lock);
1.295 erh 614:
1.226 oster 615: if (num_root == 1) {
1.335 mlelstv 616: booted_device = dksc->sc_dev;
1.351 christos 617: booted_method = "raidframe/multi";
1.310 christos 618: booted_partition = 0; /* XXX assume 'a' */
1.226 oster 619: } else {
620: /* we can't guess.. require the user to answer... */
621: boothowto |= RB_ASKNAME;
622: }
1.51 oster 623: }
1.1 oster 624: }
625:
1.324 mrg 626: static int
1.169 oster 627: raidsize(dev_t dev)
1.1 oster 628: {
629: struct raid_softc *rs;
1.335 mlelstv 630: struct dk_softc *dksc;
631: unsigned int unit;
1.1 oster 632:
633: unit = raidunit(dev);
1.327 pgoyette 634: if ((rs = raidget(unit, false)) == NULL)
1.336 mlelstv 635: return -1;
1.335 mlelstv 636: dksc = &rs->sc_dksc;
637:
1.1 oster 638: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.336 mlelstv 639: return -1;
1.1 oster 640:
1.335 mlelstv 641: return dk_size(dksc, dev);
642: }
1.1 oster 643:
1.335 mlelstv 644: static int
645: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
646: {
647: unsigned int unit;
648: struct raid_softc *rs;
649: struct dk_softc *dksc;
1.1 oster 650:
1.335 mlelstv 651: unit = raidunit(dev);
652: if ((rs = raidget(unit, false)) == NULL)
653: return ENXIO;
654: dksc = &rs->sc_dksc;
1.1 oster 655:
1.335 mlelstv 656: if ((rs->sc_flags & RAIDF_INITED) == 0)
657: return ENODEV;
1.1 oster 658:
1.336 mlelstv 659: /*
660: Note that blkno is relative to this particular partition.
661: By adding adding RF_PROTECTED_SECTORS, we get a value that
662: is relative to the partition used for the underlying component.
663: */
664: blkno += RF_PROTECTED_SECTORS;
665:
1.335 mlelstv 666: return dk_dump(dksc, dev, blkno, va, size);
1.1 oster 667: }
668:
1.324 mrg 669: static int
1.335 mlelstv 670: raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1.1 oster 671: {
1.335 mlelstv 672: struct raid_softc *rs = raidsoftc(dev);
1.231 oster 673: const struct bdevsw *bdev;
674: RF_Raid_t *raidPtr;
1.335 mlelstv 675: int c, sparecol, j, scol, dumpto;
1.231 oster 676: int error = 0;
677:
1.300 christos 678: raidPtr = &rs->sc_r;
1.231 oster 679:
680: /* we only support dumping to RAID 1 sets */
681: if (raidPtr->Layout.numDataCol != 1 ||
682: raidPtr->Layout.numParityCol != 1)
683: return EINVAL;
684:
685: if ((error = raidlock(rs)) != 0)
686: return error;
687:
688: /* figure out what device is alive.. */
689:
690: /*
691: Look for a component to dump to. The preference for the
692: component to dump to is as follows:
693: 1) the master
694: 2) a used_spare of the master
695: 3) the slave
696: 4) a used_spare of the slave
697: */
698:
699: dumpto = -1;
700: for (c = 0; c < raidPtr->numCol; c++) {
701: if (raidPtr->Disks[c].status == rf_ds_optimal) {
702: /* this might be the one */
703: dumpto = c;
704: break;
705: }
706: }
707:
708: /*
709: At this point we have possibly selected a live master or a
710: live slave. We now check to see if there is a spared
711: master (or a spared slave), if we didn't find a live master
712: or a live slave.
713: */
714:
715: for (c = 0; c < raidPtr->numSpare; c++) {
716: sparecol = raidPtr->numCol + c;
717: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
718: /* How about this one? */
719: scol = -1;
720: for(j=0;j<raidPtr->numCol;j++) {
721: if (raidPtr->Disks[j].spareCol == sparecol) {
722: scol = j;
723: break;
724: }
725: }
726: if (scol == 0) {
727: /*
728: We must have found a spared master!
729: We'll take that over anything else
730: found so far. (We couldn't have
731: found a real master before, since
732: this is a used spare, and it's
733: saying that it's replacing the
734: master.) On reboot (with
735: autoconfiguration turned on)
736: sparecol will become the 1st
737: component (component0) of this set.
738: */
739: dumpto = sparecol;
740: break;
741: } else if (scol != -1) {
742: /*
743: Must be a spared slave. We'll dump
744: to that if we havn't found anything
745: else so far.
746: */
747: if (dumpto == -1)
748: dumpto = sparecol;
749: }
750: }
751: }
752:
753: if (dumpto == -1) {
754: /* we couldn't find any live components to dump to!?!?
755: */
756: error = EINVAL;
757: goto out;
758: }
759:
760: bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
1.342 mlelstv 761: if (bdev == NULL) {
762: error = ENXIO;
763: goto out;
764: }
1.231 oster 765:
766: error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
1.336 mlelstv 767: blkno, va, nblk * raidPtr->bytesPerSector);
1.231 oster 768:
769: out:
770: raidunlock(rs);
771:
772: return error;
1.1 oster 773: }
1.324 mrg 774:
1.1 oster 775: /* ARGSUSED */
1.324 mrg 776: static int
1.222 christos 777: raidopen(dev_t dev, int flags, int fmt,
778: struct lwp *l)
1.1 oster 779: {
1.9 oster 780: int unit = raidunit(dev);
1.1 oster 781: struct raid_softc *rs;
1.335 mlelstv 782: struct dk_softc *dksc;
783: int error = 0;
1.9 oster 784: int part, pmask;
785:
1.327 pgoyette 786: if ((rs = raidget(unit, true)) == NULL)
1.300 christos 787: return ENXIO;
1.1 oster 788: if ((error = raidlock(rs)) != 0)
1.9 oster 789: return (error);
1.266 dyoung 790:
791: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
792: error = EBUSY;
793: goto bad;
794: }
795:
1.335 mlelstv 796: dksc = &rs->sc_dksc;
1.1 oster 797:
798: part = DISKPART(dev);
799: pmask = (1 << part);
800:
1.335 mlelstv 801: if (!DK_BUSY(dksc, pmask) &&
1.13 oster 802: ((rs->sc_flags & RAIDF_INITED) != 0)) {
803: /* First one... mark things as dirty... Note that we *MUST*
804: have done a configure before this. I DO NOT WANT TO BE
805: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
806: THAT THEY BELONG TOGETHER!!!!! */
807: /* XXX should check to see if we're only open for reading
808: here... If so, we needn't do this, but then need some
809: other way of keeping track of what's happened.. */
810:
1.300 christos 811: rf_markalldirty(&rs->sc_r);
1.13 oster 812: }
813:
1.335 mlelstv 814: if ((rs->sc_flags & RAIDF_INITED) != 0)
815: error = dk_open(dksc, dev, flags, fmt, l);
1.1 oster 816:
1.213 christos 817: bad:
1.1 oster 818: raidunlock(rs);
819:
1.9 oster 820: return (error);
1.1 oster 821:
822:
823: }
1.324 mrg 824:
1.335 mlelstv 825: static int
826: raid_lastclose(device_t self)
827: {
828: struct raid_softc *rs = raidsoftc(self);
829:
830: /* Last one... device is not unconfigured yet.
831: Device shutdown has taken care of setting the
832: clean bits if RAIDF_INITED is not set
833: mark things as clean... */
834:
835: rf_update_component_labels(&rs->sc_r,
836: RF_FINAL_COMPONENT_UPDATE);
837:
838: /* pass to unlocked code */
839: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
840: rs->sc_flags |= RAIDF_DETACH;
841:
842: return 0;
843: }
844:
1.1 oster 845: /* ARGSUSED */
1.324 mrg 846: static int
1.222 christos 847: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1 oster 848: {
1.9 oster 849: int unit = raidunit(dev);
1.1 oster 850: struct raid_softc *rs;
1.335 mlelstv 851: struct dk_softc *dksc;
852: cfdata_t cf;
853: int error = 0, do_detach = 0, do_put = 0;
1.1 oster 854:
1.327 pgoyette 855: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 856: return ENXIO;
1.335 mlelstv 857: dksc = &rs->sc_dksc;
1.1 oster 858:
859: if ((error = raidlock(rs)) != 0)
860: return (error);
861:
1.335 mlelstv 862: if ((rs->sc_flags & RAIDF_INITED) != 0) {
863: error = dk_close(dksc, dev, flags, fmt, l);
864: if ((rs->sc_flags & RAIDF_DETACH) != 0)
865: do_detach = 1;
866: } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
867: do_put = 1;
1.1 oster 868:
1.335 mlelstv 869: raidunlock(rs);
1.1 oster 870:
1.335 mlelstv 871: if (do_detach) {
872: /* free the pseudo device attach bits */
873: cf = device_cfdata(dksc->sc_dev);
874: error = config_detach(dksc->sc_dev, 0);
875: if (error == 0)
876: free(cf, M_RAIDFRAME);
877: } else if (do_put) {
878: raidput(rs);
1.1 oster 879: }
1.186 perry 880:
1.335 mlelstv 881: return (error);
1.147 oster 882:
1.335 mlelstv 883: }
1.327 pgoyette 884:
1.335 mlelstv 885: static void
886: raid_wakeup(RF_Raid_t *raidPtr)
887: {
888: rf_lock_mutex2(raidPtr->iodone_lock);
889: rf_signal_cond2(raidPtr->iodone_cv);
890: rf_unlock_mutex2(raidPtr->iodone_lock);
1.1 oster 891: }
892:
1.324 mrg 893: static void
1.169 oster 894: raidstrategy(struct buf *bp)
1.1 oster 895: {
1.335 mlelstv 896: unsigned int unit;
897: struct raid_softc *rs;
898: struct dk_softc *dksc;
1.1 oster 899: RF_Raid_t *raidPtr;
900:
1.335 mlelstv 901: unit = raidunit(bp->b_dev);
1.327 pgoyette 902: if ((rs = raidget(unit, false)) == NULL) {
1.30 oster 903: bp->b_error = ENXIO;
1.335 mlelstv 904: goto fail;
1.30 oster 905: }
1.300 christos 906: if ((rs->sc_flags & RAIDF_INITED) == 0) {
907: bp->b_error = ENXIO;
1.335 mlelstv 908: goto fail;
1.1 oster 909: }
1.335 mlelstv 910: dksc = &rs->sc_dksc;
1.300 christos 911: raidPtr = &rs->sc_r;
1.335 mlelstv 912:
913: /* Queue IO only */
914: if (dk_strategy_defer(dksc, bp))
1.196 yamt 915: goto done;
1.1 oster 916:
1.335 mlelstv 917: /* schedule the IO to happen at the next convenient time */
918: raid_wakeup(raidPtr);
919:
920: done:
921: return;
922:
923: fail:
924: bp->b_resid = bp->b_bcount;
925: biodone(bp);
926: }
927:
928: static int
929: raid_diskstart(device_t dev, struct buf *bp)
930: {
931: struct raid_softc *rs = raidsoftc(dev);
932: RF_Raid_t *raidPtr;
1.1 oster 933:
1.335 mlelstv 934: raidPtr = &rs->sc_r;
935: if (!raidPtr->valid) {
936: db1_printf(("raid is not valid..\n"));
937: return ENODEV;
1.196 yamt 938: }
1.285 mrg 939:
1.335 mlelstv 940: /* XXX */
941: bp->b_resid = 0;
942:
943: return raiddoaccess(raidPtr, bp);
944: }
1.1 oster 945:
1.335 mlelstv 946: void
947: raiddone(RF_Raid_t *raidPtr, struct buf *bp)
948: {
949: struct raid_softc *rs;
950: struct dk_softc *dksc;
1.34 oster 951:
1.335 mlelstv 952: rs = raidPtr->softc;
953: dksc = &rs->sc_dksc;
1.34 oster 954:
1.335 mlelstv 955: dk_done(dksc, bp);
1.34 oster 956:
1.335 mlelstv 957: rf_lock_mutex2(raidPtr->mutex);
958: raidPtr->openings++;
959: rf_unlock_mutex2(raidPtr->mutex);
1.196 yamt 960:
1.335 mlelstv 961: /* schedule more IO */
962: raid_wakeup(raidPtr);
1.1 oster 963: }
1.324 mrg 964:
1.1 oster 965: /* ARGSUSED */
1.324 mrg 966: static int
1.222 christos 967: raidread(dev_t dev, struct uio *uio, int flags)
1.1 oster 968: {
1.9 oster 969: int unit = raidunit(dev);
1.1 oster 970: struct raid_softc *rs;
971:
1.327 pgoyette 972: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 973: return ENXIO;
1.1 oster 974:
975: if ((rs->sc_flags & RAIDF_INITED) == 0)
976: return (ENXIO);
977:
978: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
979:
980: }
1.324 mrg 981:
1.1 oster 982: /* ARGSUSED */
1.324 mrg 983: static int
1.222 christos 984: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1 oster 985: {
1.9 oster 986: int unit = raidunit(dev);
1.1 oster 987: struct raid_softc *rs;
988:
1.327 pgoyette 989: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 990: return ENXIO;
1.1 oster 991:
992: if ((rs->sc_flags & RAIDF_INITED) == 0)
993: return (ENXIO);
1.147 oster 994:
1.1 oster 995: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
996:
997: }
998:
1.266 dyoung 999: static int
1000: raid_detach_unlocked(struct raid_softc *rs)
1001: {
1.335 mlelstv 1002: struct dk_softc *dksc = &rs->sc_dksc;
1003: RF_Raid_t *raidPtr;
1.266 dyoung 1004: int error;
1005:
1.300 christos 1006: raidPtr = &rs->sc_r;
1.266 dyoung 1007:
1.337 mlelstv 1008: if (DK_BUSY(dksc, 0) ||
1009: raidPtr->recon_in_progress != 0 ||
1010: raidPtr->parity_rewrite_in_progress != 0 ||
1011: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1012: return EBUSY;
1013:
1014: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.333 mlelstv 1015: return 0;
1016:
1017: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1018:
1019: if ((error = rf_Shutdown(raidPtr)) != 0)
1.266 dyoung 1020: return error;
1021:
1.335 mlelstv 1022: rs->sc_flags &= ~RAIDF_INITED;
1023:
1024: /* Kill off any queued buffers */
1025: dk_drain(dksc);
1026: bufq_free(dksc->sc_bufq);
1027:
1.266 dyoung 1028: /* Detach the disk. */
1.335 mlelstv 1029: dkwedge_delall(&dksc->sc_dkdev);
1030: disk_detach(&dksc->sc_dkdev);
1031: disk_destroy(&dksc->sc_dkdev);
1032: dk_detach(dksc);
1.333 mlelstv 1033:
1.266 dyoung 1034: return 0;
1035: }
1036:
1.366 christos 1037: static bool
1038: rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1039: {
1040: switch (cmd) {
1041: case RAIDFRAME_ADD_HOT_SPARE:
1042: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1043: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1044: case RAIDFRAME_CHECK_PARITY:
1045: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1046: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1047: case RAIDFRAME_CHECK_RECON_STATUS:
1048: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1049: case RAIDFRAME_COPYBACK:
1050: case RAIDFRAME_DELETE_COMPONENT:
1051: case RAIDFRAME_FAIL_DISK:
1052: case RAIDFRAME_GET_ACCTOTALS:
1053: case RAIDFRAME_GET_COMPONENT_LABEL:
1054: case RAIDFRAME_GET_INFO:
1055: case RAIDFRAME_GET_SIZE:
1056: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1057: case RAIDFRAME_INIT_LABELS:
1058: case RAIDFRAME_KEEP_ACCTOTALS:
1059: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1060: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1061: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1062: case RAIDFRAME_PARITYMAP_STATUS:
1063: case RAIDFRAME_REBUILD_IN_PLACE:
1064: case RAIDFRAME_REMOVE_HOT_SPARE:
1065: case RAIDFRAME_RESET_ACCTOTALS:
1066: case RAIDFRAME_REWRITEPARITY:
1067: case RAIDFRAME_SET_AUTOCONFIG:
1068: case RAIDFRAME_SET_COMPONENT_LABEL:
1069: case RAIDFRAME_SET_ROOT:
1.369 oster 1070: return (rs->sc_flags & RAIDF_INITED) == 0;
1.366 christos 1071: }
1072: return false;
1073: }
1074:
1075: int
1076: rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1077: {
1078: struct rf_recon_req_internal *rrint;
1079:
1080: if (raidPtr->Layout.map->faultsTolerated == 0) {
1081: /* Can't do this on a RAID 0!! */
1082: return EINVAL;
1083: }
1084:
1085: if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1086: /* bad column */
1087: return EINVAL;
1088: }
1089:
1090: rf_lock_mutex2(raidPtr->mutex);
1091: if (raidPtr->status == rf_rs_reconstructing) {
1092: /* you can't fail a disk while we're reconstructing! */
1093: /* XXX wrong for RAID6 */
1094: goto out;
1095: }
1096: if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1097: (raidPtr->numFailures > 0)) {
1098: /* some other component has failed. Let's not make
1099: things worse. XXX wrong for RAID6 */
1100: goto out;
1101: }
1102: if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1103: /* Can't fail a spared disk! */
1104: goto out;
1105: }
1106: rf_unlock_mutex2(raidPtr->mutex);
1107:
1108: /* make a copy of the recon request so that we don't rely on
1109: * the user's buffer */
1110: RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1111: if (rrint == NULL)
1112: return(ENOMEM);
1113: rrint->col = rr->col;
1114: rrint->flags = rr->flags;
1115: rrint->raidPtr = raidPtr;
1116:
1117: return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1118: rrint, "raid_recon");
1119: out:
1120: rf_unlock_mutex2(raidPtr->mutex);
1121: return EINVAL;
1122: }
1123:
1.324 mrg 1124: static int
1.367 christos 1125: rf_copyinspecificbuf(RF_Config_t *k_cfg)
1126: {
1127: /* allocate a buffer for the layout-specific data, and copy it in */
1128: if (k_cfg->layoutSpecificSize == 0)
1129: return 0;
1130:
1131: if (k_cfg->layoutSpecificSize > 10000) {
1132: /* sanity check */
1133: return EINVAL;
1134: }
1135:
1136: u_char *specific_buf;
1137: RF_Malloc(specific_buf, k_cfg->layoutSpecificSize, (u_char *));
1138: if (specific_buf == NULL)
1139: return ENOMEM;
1140:
1141: int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142: k_cfg->layoutSpecificSize);
1143: if (retcode) {
1144: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1145: db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1146: return retcode;
1147: }
1148:
1149: k_cfg->layoutSpecific = specific_buf;
1150: return 0;
1151: }
1152:
1153: static int
1154: rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1155: {
1156: if (rs->sc_r.valid) {
1157: /* There is a valid RAID set running on this unit! */
1158: printf("raid%d: Device already configured!\n", rs->sc_unit);
1159: return EINVAL;
1160: }
1161:
1162: /* copy-in the configuration information */
1163: /* data points to a pointer to the configuration structure */
1164: RF_Malloc(*k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1165: if (*k_cfg == NULL) {
1166: return ENOMEM;
1167: }
1168: int retcode = copyin(data, k_cfg, sizeof(RF_Config_t));
1169: if (retcode == 0)
1170: return 0;
1171: RF_Free(*k_cfg, sizeof(RF_Config_t));
1172: db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1173: rs->sc_flags |= RAIDF_SHUTDOWN;
1174: return retcode;
1175: }
1176:
1177: int
1178: rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1179: {
1180: int retcode;
1181: RF_Raid_t *raidPtr = &rs->sc_r;
1182:
1183: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1184:
1185: if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1186: goto out;
1187:
1188: /* should do some kind of sanity check on the configuration.
1189: * Store the sum of all the bytes in the last byte? */
1190:
1191: /* configure the system */
1192:
1193: /*
1194: * Clear the entire RAID descriptor, just to make sure
1195: * there is no stale data left in the case of a
1196: * reconfiguration
1197: */
1198: memset(raidPtr, 0, sizeof(*raidPtr));
1199: raidPtr->softc = rs;
1200: raidPtr->raidid = rs->sc_unit;
1201:
1202: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1203:
1204: if (retcode == 0) {
1205: /* allow this many simultaneous IO's to
1206: this RAID device */
1207: raidPtr->openings = RAIDOUTSTANDING;
1208:
1209: raidinit(rs);
1210: raid_wakeup(raidPtr);
1211: rf_markalldirty(raidPtr);
1212: }
1213:
1214: /* free the buffers. No return code here. */
1215: if (k_cfg->layoutSpecificSize) {
1216: RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1217: }
1218: out:
1219: RF_Free(k_cfg, sizeof(RF_Config_t));
1220: if (retcode) {
1221: /*
1222: * If configuration failed, set sc_flags so that we
1223: * will detach the device when we close it.
1224: */
1225: rs->sc_flags |= RAIDF_SHUTDOWN;
1226: }
1227: return retcode;
1228: }
1229:
1230: #if RF_DISABLED
1231: static int
1232: rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1233: {
1234:
1235: /* XXX check the label for valid stuff... */
1236: /* Note that some things *should not* get modified --
1237: the user should be re-initing the labels instead of
1238: trying to patch things.
1239: */
1240: #ifdef DEBUG
1241: int raidid = raidPtr->raidid;
1242: printf("raid%d: Got component label:\n", raidid);
1243: printf("raid%d: Version: %d\n", raidid, clabel->version);
1244: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1245: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1246: printf("raid%d: Column: %d\n", raidid, clabel->column);
1247: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1248: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1249: printf("raid%d: Status: %d\n", raidid, clabel->status);
1250: #endif /* DEBUG */
1251: clabel->row = 0;
1252: int column = clabel->column;
1253:
1254: if ((column < 0) || (column >= raidPtr->numCol)) {
1255: return(EINVAL);
1256: }
1257:
1258: /* XXX this isn't allowed to do anything for now :-) */
1259:
1260: /* XXX and before it is, we need to fill in the rest
1261: of the fields!?!?!?! */
1262: memcpy(raidget_component_label(raidPtr, column),
1263: clabel, sizeof(*clabel));
1264: raidflush_component_label(raidPtr, column);
1265: return 0;
1266: }
1267: #endif
1268:
1269: static int
1270: rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1271: {
1272: /*
1273: we only want the serial number from
1274: the above. We get all the rest of the information
1275: from the config that was used to create this RAID
1276: set.
1277: */
1278:
1279: raidPtr->serial_number = clabel->serial_number;
1280:
1281: for (int column = 0; column < raidPtr->numCol; column++) {
1282: RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1283: if (RF_DEAD_DISK(diskPtr->status))
1284: continue;
1285: RF_ComponentLabel_t *ci_label = raidget_component_label(
1286: raidPtr, column);
1287: /* Zeroing this is important. */
1288: memset(ci_label, 0, sizeof(*ci_label));
1289: raid_init_component_label(raidPtr, ci_label);
1290: ci_label->serial_number = raidPtr->serial_number;
1291: ci_label->row = 0; /* we dont' pretend to support more */
1292: rf_component_label_set_partitionsize(ci_label,
1293: diskPtr->partitionSize);
1294: ci_label->column = column;
1295: raidflush_component_label(raidPtr, column);
1296: /* XXXjld what about the spares? */
1297: }
1298:
1299: return 0;
1300: }
1301:
1302: static int
1303: rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1304: {
1305:
1306: if (raidPtr->Layout.map->faultsTolerated == 0) {
1307: /* Can't do this on a RAID 0!! */
1308: return EINVAL;
1309: }
1310:
1311: if (raidPtr->recon_in_progress == 1) {
1312: /* a reconstruct is already in progress! */
1313: return EINVAL;
1314: }
1315:
1316: RF_SingleComponent_t component;
1317: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1318: component.row = 0; /* we don't support any more */
1319: int column = component.column;
1320:
1321: if ((column < 0) || (column >= raidPtr->numCol)) {
1322: return EINVAL;
1323: }
1324:
1325: rf_lock_mutex2(raidPtr->mutex);
1326: if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1327: (raidPtr->numFailures > 0)) {
1328: /* XXX 0 above shouldn't be constant!!! */
1329: /* some component other than this has failed.
1330: Let's not make things worse than they already
1331: are... */
1332: printf("raid%d: Unable to reconstruct to disk at:\n",
1333: raidPtr->raidid);
1334: printf("raid%d: Col: %d Too many failures.\n",
1335: raidPtr->raidid, column);
1336: rf_unlock_mutex2(raidPtr->mutex);
1337: return EINVAL;
1338: }
1339:
1340: if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1341: printf("raid%d: Unable to reconstruct to disk at:\n",
1342: raidPtr->raidid);
1343: printf("raid%d: Col: %d "
1344: "Reconstruction already occurring!\n",
1345: raidPtr->raidid, column);
1346:
1347: rf_unlock_mutex2(raidPtr->mutex);
1348: return EINVAL;
1349: }
1350:
1351: if (raidPtr->Disks[column].status == rf_ds_spared) {
1352: rf_unlock_mutex2(raidPtr->mutex);
1353: return EINVAL;
1354: }
1355:
1356: rf_unlock_mutex2(raidPtr->mutex);
1357:
1358: struct rf_recon_req_internal *rrint;
1359: RF_Malloc(rrint, sizeof(*rrint), (struct rf_recon_req_internal *));
1360: if (rrint == NULL)
1361: return ENOMEM;
1362:
1363: rrint->col = column;
1364: rrint->raidPtr = raidPtr;
1365:
1366: return RF_CREATE_THREAD(raidPtr->recon_thread,
1367: rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1368: }
1369:
1370: static int
1371: rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1372: {
1373: /*
1374: * This makes no sense on a RAID 0, or if we are not reconstructing
1375: * so tell the user it's done.
1376: */
1377: if (raidPtr->Layout.map->faultsTolerated == 0 ||
1378: raidPtr->status != rf_rs_reconstructing) {
1379: *data = 100;
1380: return 0;
1381: }
1382: if (raidPtr->reconControl->numRUsTotal == 0) {
1383: *data = 0;
1384: return 0;
1385: }
1386: *data = (raidPtr->reconControl->numRUsComplete * 100
1387: / raidPtr->reconControl->numRUsTotal);
1388: return 0;
1389: }
1390:
1391: static int
1.225 christos 1392: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1 oster 1393: {
1.9 oster 1394: int unit = raidunit(dev);
1.335 mlelstv 1395: int part, pmask;
1.1 oster 1396: struct raid_softc *rs;
1.335 mlelstv 1397: struct dk_softc *dksc;
1.367 christos 1398: RF_Config_t *k_cfg;
1.42 oster 1399: RF_Raid_t *raidPtr;
1.41 oster 1400: RF_AccTotals_t *totals;
1.367 christos 1401: RF_SingleComponent_t component;
1.370 ! christos 1402: RF_DeviceConfig_t *d_cfg;
1.11 oster 1403: int retcode = 0;
1404: int column;
1.48 oster 1405: RF_ComponentLabel_t *clabel;
1.12 oster 1406: RF_SingleComponent_t *sparePtr,*componentPtr;
1.353 mrg 1407: int d;
1.1 oster 1408:
1.327 pgoyette 1409: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 1410: return ENXIO;
1.366 christos 1411:
1.335 mlelstv 1412: dksc = &rs->sc_dksc;
1.300 christos 1413: raidPtr = &rs->sc_r;
1.1 oster 1414:
1.276 mrg 1415: db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1.366 christos 1416: (int) DISKPART(dev), (int) unit, cmd));
1.1 oster 1417:
1418: /* Must be initialized for these... */
1.366 christos 1419: if (rf_must_be_initialized(rs, cmd))
1420: return ENXIO;
1.9 oster 1421:
1.358 pgoyette 1422: switch (cmd) {
1.1 oster 1423: /* configure the system */
1424: case RAIDFRAME_CONFIGURE:
1.367 christos 1425: if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1426: return retcode;
1427: return rf_construct(rs, k_cfg);
1.9 oster 1428:
1429: /* shutdown the system */
1.1 oster 1430: case RAIDFRAME_SHUTDOWN:
1.9 oster 1431:
1.266 dyoung 1432: part = DISKPART(dev);
1433: pmask = (1 << part);
1434:
1.367 christos 1435: if ((retcode = raidlock(rs)) != 0)
1436: return retcode;
1.1 oster 1437:
1.337 mlelstv 1438: if (DK_BUSY(dksc, pmask) ||
1439: raidPtr->recon_in_progress != 0 ||
1440: raidPtr->parity_rewrite_in_progress != 0 ||
1441: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1442: retcode = EBUSY;
1443: else {
1.335 mlelstv 1444: /* detach and free on close */
1.266 dyoung 1445: rs->sc_flags |= RAIDF_SHUTDOWN;
1446: retcode = 0;
1.9 oster 1447: }
1.11 oster 1448:
1.266 dyoung 1449: raidunlock(rs);
1.1 oster 1450:
1.367 christos 1451: return retcode;
1.11 oster 1452: case RAIDFRAME_GET_COMPONENT_LABEL:
1.353 mrg 1453: return rf_get_component_label(raidPtr, data);
1.11 oster 1454:
1.367 christos 1455: #if RF_DISABLED
1.11 oster 1456: case RAIDFRAME_SET_COMPONENT_LABEL:
1.367 christos 1457: return rf_set_component_label(raidPtr, data);
1458: #endif
1.11 oster 1459:
1.367 christos 1460: case RAIDFRAME_INIT_LABELS:
1461: return rf_init_component_label(raidPtr, data);
1.12 oster 1462:
1.48 oster 1463: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1464: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186 perry 1465: printf("raid%d: New autoconfig value is: %d\n",
1.123 oster 1466: raidPtr->raidid, d);
1.78 minoura 1467: *(int *) data = d;
1.367 christos 1468: return retcode;
1.48 oster 1469:
1470: case RAIDFRAME_SET_ROOT:
1.78 minoura 1471: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186 perry 1472: printf("raid%d: New rootpartition value is: %d\n",
1.123 oster 1473: raidPtr->raidid, d);
1.78 minoura 1474: *(int *) data = d;
1.367 christos 1475: return retcode;
1.9 oster 1476:
1.1 oster 1477: /* initialize all parity */
1478: case RAIDFRAME_REWRITEPARITY:
1479:
1.42 oster 1480: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1481: /* Parity for RAID 0 is trivially correct */
1.42 oster 1482: raidPtr->parity_good = RF_RAID_CLEAN;
1.367 christos 1483: return 0;
1.17 oster 1484: }
1.186 perry 1485:
1.42 oster 1486: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1487: /* Re-write is already in progress! */
1.367 christos 1488: return EINVAL;
1.37 oster 1489: }
1.27 oster 1490:
1.367 christos 1491: return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1492: rf_RewriteParityThread, raidPtr,"raid_parity");
1.11 oster 1493:
1494: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1495: sparePtr = (RF_SingleComponent_t *) data;
1.367 christos 1496: memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1497: return rf_add_hot_spare(raidPtr, &component);
1.11 oster 1498:
1499: case RAIDFRAME_REMOVE_HOT_SPARE:
1.367 christos 1500: return retcode;
1.73 oster 1501:
1502: case RAIDFRAME_DELETE_COMPONENT:
1503: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1504: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1505: return rf_delete_component(raidPtr, &component);
1.73 oster 1506:
1507: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1508: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1509: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1510: return rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1511:
1.12 oster 1512: case RAIDFRAME_REBUILD_IN_PLACE:
1.367 christos 1513: return rf_rebuild_in_place(raidPtr, data);
1.24 oster 1514:
1.366 christos 1515: case RAIDFRAME_GET_INFO:
1.41 oster 1516: RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1517: (RF_DeviceConfig_t *));
1518: if (d_cfg == NULL)
1.366 christos 1519: return ENOMEM;
1.353 mrg 1520: retcode = rf_get_info(raidPtr, d_cfg);
1521: if (retcode == 0) {
1.370 ! christos 1522: retcode = copyout(d_cfg, data, sizeof(*d_cfg));
1.41 oster 1523: }
1524: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1.366 christos 1525: return retcode;
1.9 oster 1526:
1.22 oster 1527: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1528: *(int *) data = raidPtr->parity_good;
1.367 christos 1529: return 0;
1.41 oster 1530:
1.269 jld 1531: case RAIDFRAME_PARITYMAP_STATUS:
1.273 jld 1532: if (rf_paritymap_ineligible(raidPtr))
1533: return EINVAL;
1.367 christos 1534: rf_paritymap_status(raidPtr->parity_map, data);
1.269 jld 1535: return 0;
1536:
1537: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273 jld 1538: if (rf_paritymap_ineligible(raidPtr))
1539: return EINVAL;
1.269 jld 1540: if (raidPtr->parity_map == NULL)
1541: return ENOENT; /* ??? */
1.367 christos 1542: if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1.269 jld 1543: return EINVAL;
1544: return 0;
1545:
1546: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273 jld 1547: if (rf_paritymap_ineligible(raidPtr))
1548: return EINVAL;
1.269 jld 1549: *(int *) data = rf_paritymap_get_disable(raidPtr);
1550: return 0;
1551:
1552: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273 jld 1553: if (rf_paritymap_ineligible(raidPtr))
1554: return EINVAL;
1.269 jld 1555: rf_paritymap_set_disable(raidPtr, *(int *)data);
1556: /* XXX should errors be passed up? */
1557: return 0;
1558:
1.1 oster 1559: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1560: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.367 christos 1561: return 0;
1.9 oster 1562:
1.1 oster 1563: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1564: totals = (RF_AccTotals_t *) data;
1.42 oster 1565: *totals = raidPtr->acc_totals;
1.366 christos 1566: return 0;
1.9 oster 1567:
1.1 oster 1568: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1569: raidPtr->keep_acc_totals = *(int *)data;
1.366 christos 1570: return 0;
1.9 oster 1571:
1.1 oster 1572: case RAIDFRAME_GET_SIZE:
1.42 oster 1573: *(int *) data = raidPtr->totalSectors;
1.366 christos 1574: return 0;
1.1 oster 1575:
1576: case RAIDFRAME_FAIL_DISK:
1.366 christos 1577: return rf_fail_disk(raidPtr, data);
1.9 oster 1578:
1579: /* invoke a copyback operation after recon on whatever disk
1580: * needs it, if any */
1581: case RAIDFRAME_COPYBACK:
1.24 oster 1582:
1.42 oster 1583: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1584: /* This makes no sense on a RAID 0!! */
1.367 christos 1585: return EINVAL;
1.24 oster 1586: }
1587:
1.42 oster 1588: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1589: /* Copyback is already in progress! */
1.367 christos 1590: return EINVAL;
1.37 oster 1591: }
1.27 oster 1592:
1.367 christos 1593: return RF_CREATE_THREAD(raidPtr->copyback_thread,
1594: rf_CopybackThread, raidPtr, "raid_copyback");
1.9 oster 1595:
1.1 oster 1596: /* return the percentage completion of reconstruction */
1.37 oster 1597: case RAIDFRAME_CHECK_RECON_STATUS:
1.367 christos 1598: return rf_check_recon_status(raidPtr, data);
1599:
1.83 oster 1600: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.353 mrg 1601: rf_check_recon_status_ext(raidPtr, data);
1.367 christos 1602: return 0;
1.9 oster 1603:
1.37 oster 1604: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1605: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1606: /* This makes no sense on a RAID 0, so tell the
1607: user it's done. */
1608: *(int *) data = 100;
1.367 christos 1609: return 0;
1.37 oster 1610: }
1.42 oster 1611: if (raidPtr->parity_rewrite_in_progress == 1) {
1.186 perry 1612: *(int *) data = 100 *
1613: raidPtr->parity_rewrite_stripes_done /
1.83 oster 1614: raidPtr->Layout.numStripe;
1.37 oster 1615: } else {
1616: *(int *) data = 100;
1617: }
1.367 christos 1618: return 0;
1.37 oster 1619:
1.83 oster 1620: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.353 mrg 1621: rf_check_parityrewrite_status_ext(raidPtr, data);
1.367 christos 1622: return 0;
1.83 oster 1623:
1.37 oster 1624: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1625: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1626: /* This makes no sense on a RAID 0 */
1.83 oster 1627: *(int *) data = 100;
1.367 christos 1628: return 0;
1.37 oster 1629: }
1.42 oster 1630: if (raidPtr->copyback_in_progress == 1) {
1631: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1632: raidPtr->Layout.numStripe;
1.37 oster 1633: } else {
1634: *(int *) data = 100;
1635: }
1.367 christos 1636: return 0;
1.37 oster 1637:
1.83 oster 1638: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.353 mrg 1639: rf_check_copyback_status_ext(raidPtr, data);
1640: return 0;
1.37 oster 1641:
1.341 christos 1642: case RAIDFRAME_SET_LAST_UNIT:
1643: for (column = 0; column < raidPtr->numCol; column++)
1644: if (raidPtr->Disks[column].status != rf_ds_optimal)
1645: return EBUSY;
1646:
1647: for (column = 0; column < raidPtr->numCol; column++) {
1648: clabel = raidget_component_label(raidPtr, column);
1649: clabel->last_unit = *(int *)data;
1650: raidflush_component_label(raidPtr, column);
1651: }
1652: rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1653: return 0;
1654:
1.9 oster 1655: /* the sparetable daemon calls this to wait for the kernel to
1656: * need a spare table. this ioctl does not return until a
1657: * spare table is needed. XXX -- calling mpsleep here in the
1658: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1659: * -- I should either compute the spare table in the kernel,
1660: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1661: * character device) for delivering the table -- XXX */
1.367 christos 1662: #if RF_DISABLED
1.1 oster 1663: case RAIDFRAME_SPARET_WAIT:
1.287 mrg 1664: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1665: while (!rf_sparet_wait_queue)
1.287 mrg 1666: rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.367 christos 1667: RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1.1 oster 1668: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287 mrg 1669: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1670:
1.42 oster 1671: /* structure assignment */
1.186 perry 1672: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1673:
1.1 oster 1674: RF_Free(waitreq, sizeof(*waitreq));
1.367 christos 1675: return 0;
1.9 oster 1676:
1677: /* wakes up a process waiting on SPARET_WAIT and puts an error
1678: * code in it that will cause the dameon to exit */
1.1 oster 1679: case RAIDFRAME_ABORT_SPARET_WAIT:
1680: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1681: waitreq->fcol = -1;
1.287 mrg 1682: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1683: waitreq->next = rf_sparet_wait_queue;
1684: rf_sparet_wait_queue = waitreq;
1.367 christos 1685: rf_broadcast_cond2(rf_sparet_wait_cv);
1.287 mrg 1686: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.367 christos 1687: return 0;
1.1 oster 1688:
1.9 oster 1689: /* used by the spare table daemon to deliver a spare table
1690: * into the kernel */
1.1 oster 1691: case RAIDFRAME_SEND_SPARET:
1.9 oster 1692:
1.1 oster 1693: /* install the spare table */
1.42 oster 1694: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1695:
1696: /* respond to the requestor. the return status of the spare
1697: * table installation is passed in the "fcol" field */
1.1 oster 1698: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1699: waitreq->fcol = retcode;
1.287 mrg 1700: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1701: waitreq->next = rf_sparet_resp_queue;
1702: rf_sparet_resp_queue = waitreq;
1.287 mrg 1703: rf_broadcast_cond2(rf_sparet_resp_cv);
1704: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1705:
1.367 christos 1706: return retcode;
1707: #endif
1708: default:
1709: #ifdef _LP64
1710: if ((l->l_proc->p_flag & PK_32) != 0) {
1711: module_autoload("compat_netbsd32_raid",
1712: MODULE_CLASS_EXEC);
1713: MODULE_CALL_HOOK(raidframe_netbsd32_ioctl_hook,
1714: (rs, cmd, data), enosys(), retcode);
1715: if (retcode != EPASSTHROUGH)
1716: return retcode;
1717: }
1.1 oster 1718: #endif
1.367 christos 1719: module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1720: MODULE_CALL_HOOK(raidframe_ioctl_80_hook,
1721: (rs, cmd, data), enosys(), retcode);
1722: if (retcode != EPASSTHROUGH)
1723: return retcode;
1.1 oster 1724:
1.367 christos 1725: module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1726: MODULE_CALL_HOOK(raidframe_ioctl_50_hook,
1727: (rs, cmd, data), enosys(), retcode);
1728: if (retcode != EPASSTHROUGH)
1729: return retcode;
1.36 oster 1730: break; /* fall through to the os-specific code below */
1.1 oster 1731:
1732: }
1.9 oster 1733:
1.42 oster 1734: if (!raidPtr->valid)
1.9 oster 1735: return (EINVAL);
1736:
1.1 oster 1737: /*
1738: * Add support for "regular" device ioctls here.
1739: */
1.263 haad 1740:
1.1 oster 1741: switch (cmd) {
1.348 jdolecek 1742: case DIOCGCACHE:
1743: retcode = rf_get_component_caches(raidPtr, (int *)data);
1744: break;
1745:
1.252 oster 1746: case DIOCCACHESYNC:
1.346 jdolecek 1747: retcode = rf_sync_component_caches(raidPtr);
1.347 jdolecek 1748: break;
1.298 buhrow 1749:
1.1 oster 1750: default:
1.346 jdolecek 1751: retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1.347 jdolecek 1752: break;
1.1 oster 1753: }
1.346 jdolecek 1754:
1.9 oster 1755: return (retcode);
1.1 oster 1756:
1757: }
1758:
1759:
1.9 oster 1760: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1761: RAIDframe device. */
1762:
1763:
1.59 oster 1764: static void
1.300 christos 1765: raidinit(struct raid_softc *rs)
1.1 oster 1766: {
1.262 cegger 1767: cfdata_t cf;
1.335 mlelstv 1768: unsigned int unit;
1769: struct dk_softc *dksc = &rs->sc_dksc;
1.300 christos 1770: RF_Raid_t *raidPtr = &rs->sc_r;
1.335 mlelstv 1771: device_t dev;
1.1 oster 1772:
1.59 oster 1773: unit = raidPtr->raidid;
1.1 oster 1774:
1.179 itojun 1775: /* XXX doesn't check bounds. */
1.335 mlelstv 1776: snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1.1 oster 1777:
1.217 oster 1778: /* attach the pseudo device */
1779: cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1780: cf->cf_name = raid_cd.cd_name;
1781: cf->cf_atname = raid_cd.cd_name;
1782: cf->cf_unit = unit;
1783: cf->cf_fstate = FSTATE_STAR;
1784:
1.335 mlelstv 1785: dev = config_attach_pseudo(cf);
1786: if (dev == NULL) {
1.217 oster 1787: printf("raid%d: config_attach_pseudo failed\n",
1.270 christos 1788: raidPtr->raidid);
1.265 pooka 1789: free(cf, M_RAIDFRAME);
1790: return;
1.217 oster 1791: }
1792:
1.335 mlelstv 1793: /* provide a backpointer to the real softc */
1794: raidsoftc(dev) = rs;
1795:
1.1 oster 1796: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1797: * other things, so it's critical to call this *BEFORE* we try putzing
1798: * with disklabels. */
1.335 mlelstv 1799: dk_init(dksc, dev, DKTYPE_RAID);
1800: disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.1 oster 1801:
1802: /* XXX There may be a weird interaction here between this, and
1.9 oster 1803: * protectedSectors, as used in RAIDframe. */
1.11 oster 1804:
1.9 oster 1805: rs->sc_size = raidPtr->totalSectors;
1.234 oster 1806:
1.335 mlelstv 1807: /* Attach dk and disk subsystems */
1808: dk_attach(dksc);
1809: disk_attach(&dksc->sc_dkdev);
1.318 mlelstv 1810: rf_set_geometry(rs, raidPtr);
1811:
1.335 mlelstv 1812: bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1813:
1814: /* mark unit as usuable */
1815: rs->sc_flags |= RAIDF_INITED;
1.234 oster 1816:
1.335 mlelstv 1817: dkwedge_discover(&dksc->sc_dkdev);
1.1 oster 1818: }
1.335 mlelstv 1819:
1.150 oster 1820: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1 oster 1821: /* wake up the daemon & tell it to get us a spare table
1822: * XXX
1.9 oster 1823: * the entries in the queues should be tagged with the raidPtr
1.186 perry 1824: * so that in the extremely rare case that two recons happen at once,
1.11 oster 1825: * we know for which device were requesting a spare table
1.1 oster 1826: * XXX
1.186 perry 1827: *
1.39 oster 1828: * XXX This code is not currently used. GO
1.1 oster 1829: */
1.186 perry 1830: int
1.169 oster 1831: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9 oster 1832: {
1833: int retcode;
1834:
1.287 mrg 1835: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1836: req->next = rf_sparet_wait_queue;
1837: rf_sparet_wait_queue = req;
1.289 mrg 1838: rf_broadcast_cond2(rf_sparet_wait_cv);
1.9 oster 1839:
1840: /* mpsleep unlocks the mutex */
1841: while (!rf_sparet_resp_queue) {
1.289 mrg 1842: rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9 oster 1843: }
1844: req = rf_sparet_resp_queue;
1845: rf_sparet_resp_queue = req->next;
1.287 mrg 1846: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1847:
1848: retcode = req->fcol;
1849: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1850: * alloc'd */
1851: return (retcode);
1.1 oster 1852: }
1.150 oster 1853: #endif
1.39 oster 1854:
1.186 perry 1855: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11 oster 1856: * bp & passes it down.
1.1 oster 1857: * any calls originating in the kernel must use non-blocking I/O
1858: * do some extra sanity checking to return "appropriate" error values for
1859: * certain conditions (to make some standard utilities work)
1.186 perry 1860: *
1.34 oster 1861: * Formerly known as: rf_DoAccessKernel
1.1 oster 1862: */
1.34 oster 1863: void
1.169 oster 1864: raidstart(RF_Raid_t *raidPtr)
1.1 oster 1865: {
1866: struct raid_softc *rs;
1.335 mlelstv 1867: struct dk_softc *dksc;
1.1 oster 1868:
1.300 christos 1869: rs = raidPtr->softc;
1.335 mlelstv 1870: dksc = &rs->sc_dksc;
1.56 oster 1871: /* quick check to see if anything has died recently */
1.291 mrg 1872: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1873: if (raidPtr->numNewFailures > 0) {
1.291 mrg 1874: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1875: rf_update_component_labels(raidPtr,
1.91 oster 1876: RF_NORMAL_COMPONENT_UPDATE);
1.291 mrg 1877: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1878: raidPtr->numNewFailures--;
1879: }
1.335 mlelstv 1880: rf_unlock_mutex2(raidPtr->mutex);
1.56 oster 1881:
1.335 mlelstv 1882: if ((rs->sc_flags & RAIDF_INITED) == 0) {
1883: printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1884: return;
1885: }
1.34 oster 1886:
1.335 mlelstv 1887: dk_start(dksc, NULL);
1888: }
1.34 oster 1889:
1.335 mlelstv 1890: static int
1891: raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1892: {
1893: RF_SectorCount_t num_blocks, pb, sum;
1894: RF_RaidAddr_t raid_addr;
1895: daddr_t blocknum;
1896: int do_async;
1897: int rc;
1.186 perry 1898:
1.335 mlelstv 1899: rf_lock_mutex2(raidPtr->mutex);
1900: if (raidPtr->openings == 0) {
1901: rf_unlock_mutex2(raidPtr->mutex);
1902: return EAGAIN;
1903: }
1904: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1905:
1.335 mlelstv 1906: blocknum = bp->b_rawblkno;
1.186 perry 1907:
1.335 mlelstv 1908: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1909: (int) blocknum));
1.1 oster 1910:
1.335 mlelstv 1911: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1912: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1 oster 1913:
1.335 mlelstv 1914: /* *THIS* is where we adjust what block we're going to...
1915: * but DO NOT TOUCH bp->b_blkno!!! */
1916: raid_addr = blocknum;
1917:
1918: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1919: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1920: sum = raid_addr + num_blocks + pb;
1921: if (1 || rf_debugKernelAccess) {
1922: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1923: (int) raid_addr, (int) sum, (int) num_blocks,
1924: (int) pb, (int) bp->b_resid));
1925: }
1926: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1927: || (sum < num_blocks) || (sum < pb)) {
1928: rc = ENOSPC;
1929: goto done;
1930: }
1931: /*
1932: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1933: */
1.186 perry 1934:
1.335 mlelstv 1935: if (bp->b_bcount & raidPtr->sectorMask) {
1936: rc = ENOSPC;
1937: goto done;
1938: }
1939: db1_printf(("Calling DoAccess..\n"));
1.99 oster 1940:
1.20 oster 1941:
1.335 mlelstv 1942: rf_lock_mutex2(raidPtr->mutex);
1943: raidPtr->openings--;
1.291 mrg 1944: rf_unlock_mutex2(raidPtr->mutex);
1.20 oster 1945:
1.335 mlelstv 1946: /*
1947: * Everything is async.
1948: */
1949: do_async = 1;
1.20 oster 1950:
1.335 mlelstv 1951: /* don't ever condition on bp->b_flags & B_WRITE.
1952: * always condition on B_READ instead */
1.7 explorer 1953:
1.335 mlelstv 1954: rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1955: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1956: do_async, raid_addr, num_blocks,
1957: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1958:
1959: done:
1960: return rc;
1961: }
1.7 explorer 1962:
1.1 oster 1963: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1964:
1.186 perry 1965: int
1.169 oster 1966: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1 oster 1967: {
1.9 oster 1968: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 1969: struct buf *bp;
1.9 oster 1970:
1.1 oster 1971: req->queue = queue;
1972: bp = req->bp;
1973:
1974: switch (req->type) {
1.9 oster 1975: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 1976: /* XXX need to do something extra here.. */
1.9 oster 1977: /* I'm leaving this in, as I've never actually seen it used,
1978: * and I'd like folks to report it... GO */
1.1 oster 1979: printf(("WAKEUP CALLED\n"));
1980: queue->numOutstanding++;
1981:
1.197 oster 1982: bp->b_flags = 0;
1.207 simonb 1983: bp->b_private = req;
1.1 oster 1984:
1.194 oster 1985: KernelWakeupFunc(bp);
1.1 oster 1986: break;
1.9 oster 1987:
1.1 oster 1988: case RF_IO_TYPE_READ:
1989: case RF_IO_TYPE_WRITE:
1.175 oster 1990: #if RF_ACC_TRACE > 0
1.1 oster 1991: if (req->tracerec) {
1992: RF_ETIMER_START(req->tracerec->timer);
1993: }
1.175 oster 1994: #endif
1.194 oster 1995: InitBP(bp, queue->rf_cinfo->ci_vp,
1.197 oster 1996: op, queue->rf_cinfo->ci_dev,
1.9 oster 1997: req->sectorOffset, req->numSector,
1998: req->buf, KernelWakeupFunc, (void *) req,
1999: queue->raidPtr->logBytesPerSector, req->b_proc);
1.1 oster 2000:
2001: if (rf_debugKernelAccess) {
1.9 oster 2002: db1_printf(("dispatch: bp->b_blkno = %ld\n",
2003: (long) bp->b_blkno));
1.1 oster 2004: }
2005: queue->numOutstanding++;
2006: queue->last_deq_sector = req->sectorOffset;
1.9 oster 2007: /* acc wouldn't have been let in if there were any pending
2008: * reqs at any other priority */
1.1 oster 2009: queue->curPriority = req->priority;
2010:
1.166 oster 2011: db1_printf(("Going for %c to unit %d col %d\n",
1.186 perry 2012: req->type, queue->raidPtr->raidid,
1.166 oster 2013: queue->col));
1.1 oster 2014: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 2015: (int) req->sectorOffset, (int) req->numSector,
2016: (int) (req->numSector <<
2017: queue->raidPtr->logBytesPerSector),
2018: (int) queue->raidPtr->logBytesPerSector));
1.256 oster 2019:
2020: /*
2021: * XXX: drop lock here since this can block at
2022: * least with backing SCSI devices. Retake it
2023: * to minimize fuss with calling interfaces.
2024: */
2025:
2026: RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247 oster 2027: bdev_strategy(bp);
1.256 oster 2028: RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1 oster 2029: break;
1.9 oster 2030:
1.1 oster 2031: default:
2032: panic("bad req->type in rf_DispatchKernelIO");
2033: }
2034: db1_printf(("Exiting from DispatchKernelIO\n"));
1.134 oster 2035:
1.9 oster 2036: return (0);
1.1 oster 2037: }
1.9 oster 2038: /* this is the callback function associated with a I/O invoked from
1.1 oster 2039: kernel code.
2040: */
1.186 perry 2041: static void
1.194 oster 2042: KernelWakeupFunc(struct buf *bp)
1.9 oster 2043: {
2044: RF_DiskQueueData_t *req = NULL;
2045: RF_DiskQueue_t *queue;
2046:
2047: db1_printf(("recovering the request queue:\n"));
1.285 mrg 2048:
1.207 simonb 2049: req = bp->b_private;
1.1 oster 2050:
1.9 oster 2051: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 2052:
1.286 mrg 2053: rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285 mrg 2054:
1.175 oster 2055: #if RF_ACC_TRACE > 0
1.9 oster 2056: if (req->tracerec) {
2057: RF_ETIMER_STOP(req->tracerec->timer);
2058: RF_ETIMER_EVAL(req->tracerec->timer);
1.288 mrg 2059: rf_lock_mutex2(rf_tracing_mutex);
1.9 oster 2060: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2061: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2062: req->tracerec->num_phys_ios++;
1.288 mrg 2063: rf_unlock_mutex2(rf_tracing_mutex);
1.9 oster 2064: }
1.175 oster 2065: #endif
1.1 oster 2066:
1.230 ad 2067: /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9 oster 2068: * ballistic, and mark the component as hosed... */
1.36 oster 2069:
1.230 ad 2070: if (bp->b_error != 0) {
1.9 oster 2071: /* Mark the disk as dead */
2072: /* but only mark it once... */
1.186 perry 2073: /* and only if it wouldn't leave this RAID set
1.183 oster 2074: completely broken */
1.193 oster 2075: if (((queue->raidPtr->Disks[queue->col].status ==
2076: rf_ds_optimal) ||
2077: (queue->raidPtr->Disks[queue->col].status ==
2078: rf_ds_used_spare)) &&
2079: (queue->raidPtr->numFailures <
1.204 simonb 2080: queue->raidPtr->Layout.map->faultsTolerated)) {
1.322 prlw1 2081: printf("raid%d: IO Error (%d). Marking %s as failed.\n",
1.136 oster 2082: queue->raidPtr->raidid,
1.322 prlw1 2083: bp->b_error,
1.166 oster 2084: queue->raidPtr->Disks[queue->col].devname);
2085: queue->raidPtr->Disks[queue->col].status =
1.9 oster 2086: rf_ds_failed;
1.166 oster 2087: queue->raidPtr->status = rf_rs_degraded;
1.9 oster 2088: queue->raidPtr->numFailures++;
1.56 oster 2089: queue->raidPtr->numNewFailures++;
1.9 oster 2090: } else { /* Disk is already dead... */
2091: /* printf("Disk already marked as dead!\n"); */
2092: }
1.4 oster 2093:
1.9 oster 2094: }
1.4 oster 2095:
1.143 oster 2096: /* Fill in the error value */
1.230 ad 2097: req->error = bp->b_error;
1.143 oster 2098:
2099: /* Drop this one on the "finished" queue... */
2100: TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2101:
2102: /* Let the raidio thread know there is work to be done. */
1.286 mrg 2103: rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143 oster 2104:
1.286 mrg 2105: rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1 oster 2106: }
2107:
2108:
2109: /*
2110: * initialize a buf structure for doing an I/O in the kernel.
2111: */
1.186 perry 2112: static void
1.169 oster 2113: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225 christos 2114: RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.169 oster 2115: void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2116: struct proc *b_proc)
1.9 oster 2117: {
2118: /* bp->b_flags = B_PHYS | rw_flag; */
1.242 ad 2119: bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2120: bp->b_oflags = 0;
2121: bp->b_cflags = 0;
1.9 oster 2122: bp->b_bcount = numSect << logBytesPerSector;
2123: bp->b_bufsize = bp->b_bcount;
2124: bp->b_error = 0;
2125: bp->b_dev = dev;
1.187 christos 2126: bp->b_data = bf;
1.275 mrg 2127: bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9 oster 2128: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 2129: if (bp->b_bcount == 0) {
1.141 provos 2130: panic("bp->b_bcount is zero in InitBP!!");
1.1 oster 2131: }
1.161 fvdl 2132: bp->b_proc = b_proc;
1.9 oster 2133: bp->b_iodone = cbFunc;
1.207 simonb 2134: bp->b_private = cbArg;
1.1 oster 2135: }
2136:
2137: /*
2138: * Wait interruptibly for an exclusive lock.
2139: *
2140: * XXX
2141: * Several drivers do this; it should be abstracted and made MP-safe.
2142: * (Hmm... where have we seen this warning before :-> GO )
2143: */
2144: static int
1.169 oster 2145: raidlock(struct raid_softc *rs)
1.1 oster 2146: {
1.9 oster 2147: int error;
1.1 oster 2148:
1.335 mlelstv 2149: error = 0;
1.327 pgoyette 2150: mutex_enter(&rs->sc_mutex);
1.1 oster 2151: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2152: rs->sc_flags |= RAIDF_WANTED;
1.327 pgoyette 2153: error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2154: if (error != 0)
1.335 mlelstv 2155: goto done;
1.1 oster 2156: }
2157: rs->sc_flags |= RAIDF_LOCKED;
1.335 mlelstv 2158: done:
1.327 pgoyette 2159: mutex_exit(&rs->sc_mutex);
1.335 mlelstv 2160: return (error);
1.1 oster 2161: }
2162: /*
2163: * Unlock and wake up any waiters.
2164: */
2165: static void
1.169 oster 2166: raidunlock(struct raid_softc *rs)
1.1 oster 2167: {
2168:
1.327 pgoyette 2169: mutex_enter(&rs->sc_mutex);
1.1 oster 2170: rs->sc_flags &= ~RAIDF_LOCKED;
2171: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2172: rs->sc_flags &= ~RAIDF_WANTED;
1.327 pgoyette 2173: cv_broadcast(&rs->sc_cv);
1.1 oster 2174: }
1.327 pgoyette 2175: mutex_exit(&rs->sc_mutex);
1.11 oster 2176: }
1.186 perry 2177:
1.11 oster 2178:
2179: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2180: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1.269 jld 2181: #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
1.11 oster 2182:
1.276 mrg 2183: static daddr_t
2184: rf_component_info_offset(void)
2185: {
2186:
2187: return RF_COMPONENT_INFO_OFFSET;
2188: }
2189:
2190: static daddr_t
2191: rf_component_info_size(unsigned secsize)
2192: {
2193: daddr_t info_size;
2194:
2195: KASSERT(secsize);
2196: if (secsize > RF_COMPONENT_INFO_SIZE)
2197: info_size = secsize;
2198: else
2199: info_size = RF_COMPONENT_INFO_SIZE;
2200:
2201: return info_size;
2202: }
2203:
2204: static daddr_t
2205: rf_parity_map_offset(RF_Raid_t *raidPtr)
2206: {
2207: daddr_t map_offset;
2208:
2209: KASSERT(raidPtr->bytesPerSector);
2210: if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2211: map_offset = raidPtr->bytesPerSector;
2212: else
2213: map_offset = RF_COMPONENT_INFO_SIZE;
2214: map_offset += rf_component_info_offset();
2215:
2216: return map_offset;
2217: }
2218:
2219: static daddr_t
2220: rf_parity_map_size(RF_Raid_t *raidPtr)
2221: {
2222: daddr_t map_size;
2223:
2224: if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2225: map_size = raidPtr->bytesPerSector;
2226: else
2227: map_size = RF_PARITY_MAP_SIZE;
2228:
2229: return map_size;
2230: }
2231:
1.186 perry 2232: int
1.269 jld 2233: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12 oster 2234: {
1.269 jld 2235: RF_ComponentLabel_t *clabel;
2236:
2237: clabel = raidget_component_label(raidPtr, col);
2238: clabel->clean = RF_RAID_CLEAN;
2239: raidflush_component_label(raidPtr, col);
1.12 oster 2240: return(0);
2241: }
2242:
2243:
1.186 perry 2244: int
1.269 jld 2245: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11 oster 2246: {
1.269 jld 2247: RF_ComponentLabel_t *clabel;
2248:
2249: clabel = raidget_component_label(raidPtr, col);
2250: clabel->clean = RF_RAID_DIRTY;
2251: raidflush_component_label(raidPtr, col);
1.11 oster 2252: return(0);
2253: }
2254:
2255: int
1.269 jld 2256: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2257: {
1.276 mrg 2258: KASSERT(raidPtr->bytesPerSector);
2259: return raidread_component_label(raidPtr->bytesPerSector,
2260: raidPtr->Disks[col].dev,
1.269 jld 2261: raidPtr->raid_cinfo[col].ci_vp,
2262: &raidPtr->raid_cinfo[col].ci_label);
2263: }
2264:
2265: RF_ComponentLabel_t *
2266: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2267: {
2268: return &raidPtr->raid_cinfo[col].ci_label;
2269: }
2270:
2271: int
2272: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2273: {
2274: RF_ComponentLabel_t *label;
2275:
2276: label = &raidPtr->raid_cinfo[col].ci_label;
2277: label->mod_counter = raidPtr->mod_counter;
2278: #ifndef RF_NO_PARITY_MAP
2279: label->parity_map_modcount = label->mod_counter;
2280: #endif
1.276 mrg 2281: return raidwrite_component_label(raidPtr->bytesPerSector,
2282: raidPtr->Disks[col].dev,
1.269 jld 2283: raidPtr->raid_cinfo[col].ci_vp, label);
2284: }
2285:
2286:
2287: static int
1.276 mrg 2288: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269 jld 2289: RF_ComponentLabel_t *clabel)
2290: {
2291: return raidread_component_area(dev, b_vp, clabel,
2292: sizeof(RF_ComponentLabel_t),
1.276 mrg 2293: rf_component_info_offset(),
2294: rf_component_info_size(secsize));
1.269 jld 2295: }
2296:
2297: /* ARGSUSED */
2298: static int
2299: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2300: size_t msize, daddr_t offset, daddr_t dsize)
1.11 oster 2301: {
2302: struct buf *bp;
2303: int error;
1.186 perry 2304:
1.11 oster 2305: /* XXX should probably ensure that we don't try to do this if
1.186 perry 2306: someone has changed rf_protected_sectors. */
1.11 oster 2307:
1.98 oster 2308: if (b_vp == NULL) {
2309: /* For whatever reason, this component is not valid.
2310: Don't try to read a component label from it. */
2311: return(EINVAL);
2312: }
2313:
1.11 oster 2314: /* get a block of the appropriate size... */
1.269 jld 2315: bp = geteblk((int)dsize);
1.11 oster 2316: bp->b_dev = dev;
2317:
2318: /* get our ducks in a row for the read */
1.269 jld 2319: bp->b_blkno = offset / DEV_BSIZE;
2320: bp->b_bcount = dsize;
1.100 chs 2321: bp->b_flags |= B_READ;
1.269 jld 2322: bp->b_resid = dsize;
1.11 oster 2323:
1.331 mlelstv 2324: bdev_strategy(bp);
1.340 christos 2325: error = biowait(bp);
1.11 oster 2326:
2327: if (!error) {
1.269 jld 2328: memcpy(data, bp->b_data, msize);
1.204 simonb 2329: }
1.11 oster 2330:
1.233 ad 2331: brelse(bp, 0);
1.11 oster 2332: return(error);
2333: }
1.269 jld 2334:
2335:
2336: static int
1.276 mrg 2337: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2338: RF_ComponentLabel_t *clabel)
1.269 jld 2339: {
2340: return raidwrite_component_area(dev, b_vp, clabel,
2341: sizeof(RF_ComponentLabel_t),
1.276 mrg 2342: rf_component_info_offset(),
2343: rf_component_info_size(secsize), 0);
1.269 jld 2344: }
2345:
1.11 oster 2346: /* ARGSUSED */
1.269 jld 2347: static int
2348: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2349: size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11 oster 2350: {
2351: struct buf *bp;
2352: int error;
2353:
2354: /* get a block of the appropriate size... */
1.269 jld 2355: bp = geteblk((int)dsize);
1.11 oster 2356: bp->b_dev = dev;
2357:
2358: /* get our ducks in a row for the write */
1.269 jld 2359: bp->b_blkno = offset / DEV_BSIZE;
2360: bp->b_bcount = dsize;
2361: bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2362: bp->b_resid = dsize;
1.11 oster 2363:
1.269 jld 2364: memset(bp->b_data, 0, dsize);
2365: memcpy(bp->b_data, data, msize);
1.11 oster 2366:
1.331 mlelstv 2367: bdev_strategy(bp);
1.269 jld 2368: if (asyncp)
2369: return 0;
1.340 christos 2370: error = biowait(bp);
1.233 ad 2371: brelse(bp, 0);
1.11 oster 2372: if (error) {
1.48 oster 2373: #if 1
1.11 oster 2374: printf("Failed to write RAID component info!\n");
1.48 oster 2375: #endif
1.11 oster 2376: }
2377:
2378: return(error);
1.1 oster 2379: }
1.12 oster 2380:
1.186 perry 2381: void
1.269 jld 2382: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2383: {
2384: int c;
2385:
2386: for (c = 0; c < raidPtr->numCol; c++) {
2387: /* Skip dead disks. */
2388: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2389: continue;
2390: /* XXXjld: what if an error occurs here? */
2391: raidwrite_component_area(raidPtr->Disks[c].dev,
2392: raidPtr->raid_cinfo[c].ci_vp, map,
2393: RF_PARITYMAP_NBYTE,
1.276 mrg 2394: rf_parity_map_offset(raidPtr),
2395: rf_parity_map_size(raidPtr), 0);
1.269 jld 2396: }
2397: }
2398:
2399: void
2400: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2401: {
2402: struct rf_paritymap_ondisk tmp;
1.272 oster 2403: int c,first;
1.269 jld 2404:
1.272 oster 2405: first=1;
1.269 jld 2406: for (c = 0; c < raidPtr->numCol; c++) {
2407: /* Skip dead disks. */
2408: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2409: continue;
2410: raidread_component_area(raidPtr->Disks[c].dev,
2411: raidPtr->raid_cinfo[c].ci_vp, &tmp,
2412: RF_PARITYMAP_NBYTE,
1.276 mrg 2413: rf_parity_map_offset(raidPtr),
2414: rf_parity_map_size(raidPtr));
1.272 oster 2415: if (first) {
1.269 jld 2416: memcpy(map, &tmp, sizeof(*map));
1.272 oster 2417: first = 0;
1.269 jld 2418: } else {
2419: rf_paritymap_merge(map, &tmp);
2420: }
2421: }
2422: }
2423:
2424: void
1.169 oster 2425: rf_markalldirty(RF_Raid_t *raidPtr)
1.12 oster 2426: {
1.269 jld 2427: RF_ComponentLabel_t *clabel;
1.146 oster 2428: int sparecol;
1.166 oster 2429: int c;
2430: int j;
2431: int scol = -1;
1.12 oster 2432:
2433: raidPtr->mod_counter++;
1.166 oster 2434: for (c = 0; c < raidPtr->numCol; c++) {
2435: /* we don't want to touch (at all) a disk that has
2436: failed */
2437: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269 jld 2438: clabel = raidget_component_label(raidPtr, c);
2439: if (clabel->status == rf_ds_spared) {
1.186 perry 2440: /* XXX do something special...
2441: but whatever you do, don't
1.166 oster 2442: try to access it!! */
2443: } else {
1.269 jld 2444: raidmarkdirty(raidPtr, c);
1.12 oster 2445: }
1.166 oster 2446: }
1.186 perry 2447: }
1.146 oster 2448:
1.12 oster 2449: for( c = 0; c < raidPtr->numSpare ; c++) {
2450: sparecol = raidPtr->numCol + c;
1.166 oster 2451: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2452: /*
2453:
2454: we claim this disk is "optimal" if it's
2455: rf_ds_used_spare, as that means it should be
2456: directly substitutable for the disk it replaced.
1.12 oster 2457: We note that too...
2458:
2459: */
2460:
1.166 oster 2461: for(j=0;j<raidPtr->numCol;j++) {
2462: if (raidPtr->Disks[j].spareCol == sparecol) {
2463: scol = j;
2464: break;
1.12 oster 2465: }
2466: }
1.186 perry 2467:
1.269 jld 2468: clabel = raidget_component_label(raidPtr, sparecol);
1.12 oster 2469: /* make sure status is noted */
1.146 oster 2470:
1.269 jld 2471: raid_init_component_label(raidPtr, clabel);
1.146 oster 2472:
1.269 jld 2473: clabel->row = 0;
2474: clabel->column = scol;
1.146 oster 2475: /* Note: we *don't* change status from rf_ds_used_spare
2476: to rf_ds_optimal */
2477: /* clabel.status = rf_ds_optimal; */
1.186 perry 2478:
1.269 jld 2479: raidmarkdirty(raidPtr, sparecol);
1.12 oster 2480: }
2481: }
2482: }
2483:
1.13 oster 2484:
2485: void
1.169 oster 2486: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13 oster 2487: {
1.269 jld 2488: RF_ComponentLabel_t *clabel;
1.13 oster 2489: int sparecol;
1.166 oster 2490: int c;
2491: int j;
2492: int scol;
1.341 christos 2493: struct raid_softc *rs = raidPtr->softc;
1.13 oster 2494:
2495: scol = -1;
2496:
1.186 perry 2497: /* XXX should do extra checks to make sure things really are clean,
1.13 oster 2498: rather than blindly setting the clean bit... */
2499:
2500: raidPtr->mod_counter++;
2501:
1.166 oster 2502: for (c = 0; c < raidPtr->numCol; c++) {
2503: if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269 jld 2504: clabel = raidget_component_label(raidPtr, c);
1.201 oster 2505: /* make sure status is noted */
1.269 jld 2506: clabel->status = rf_ds_optimal;
1.201 oster 2507:
1.214 oster 2508: /* note what unit we are configured as */
1.341 christos 2509: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2510: clabel->last_unit = raidPtr->raidid;
1.214 oster 2511:
1.269 jld 2512: raidflush_component_label(raidPtr, c);
1.166 oster 2513: if (final == RF_FINAL_COMPONENT_UPDATE) {
2514: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2515: raidmarkclean(raidPtr, c);
1.91 oster 2516: }
1.166 oster 2517: }
1.186 perry 2518: }
1.166 oster 2519: /* else we don't touch it.. */
1.186 perry 2520: }
1.63 oster 2521:
2522: for( c = 0; c < raidPtr->numSpare ; c++) {
2523: sparecol = raidPtr->numCol + c;
1.110 oster 2524: /* Need to ensure that the reconstruct actually completed! */
1.166 oster 2525: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2526: /*
2527:
2528: we claim this disk is "optimal" if it's
2529: rf_ds_used_spare, as that means it should be
2530: directly substitutable for the disk it replaced.
1.63 oster 2531: We note that too...
2532:
2533: */
2534:
1.166 oster 2535: for(j=0;j<raidPtr->numCol;j++) {
2536: if (raidPtr->Disks[j].spareCol == sparecol) {
2537: scol = j;
2538: break;
1.63 oster 2539: }
2540: }
1.186 perry 2541:
1.63 oster 2542: /* XXX shouldn't *really* need this... */
1.269 jld 2543: clabel = raidget_component_label(raidPtr, sparecol);
1.63 oster 2544: /* make sure status is noted */
2545:
1.269 jld 2546: raid_init_component_label(raidPtr, clabel);
2547:
2548: clabel->column = scol;
2549: clabel->status = rf_ds_optimal;
1.341 christos 2550: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2551: clabel->last_unit = raidPtr->raidid;
1.63 oster 2552:
1.269 jld 2553: raidflush_component_label(raidPtr, sparecol);
1.91 oster 2554: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2555: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2556: raidmarkclean(raidPtr, sparecol);
1.13 oster 2557: }
2558: }
2559: }
2560: }
1.68 oster 2561: }
2562:
2563: void
1.169 oster 2564: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69 oster 2565: {
2566:
2567: if (vp != NULL) {
2568: if (auto_configured == 1) {
1.96 oster 2569: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2570: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69 oster 2571: vput(vp);
1.186 perry 2572:
2573: } else {
1.244 ad 2574: (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69 oster 2575: }
1.186 perry 2576: }
1.69 oster 2577: }
2578:
2579:
2580: void
1.169 oster 2581: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68 oster 2582: {
1.186 perry 2583: int r,c;
1.69 oster 2584: struct vnode *vp;
2585: int acd;
1.68 oster 2586:
2587:
2588: /* We take this opportunity to close the vnodes like we should.. */
2589:
1.166 oster 2590: for (c = 0; c < raidPtr->numCol; c++) {
2591: vp = raidPtr->raid_cinfo[c].ci_vp;
2592: acd = raidPtr->Disks[c].auto_configured;
2593: rf_close_component(raidPtr, vp, acd);
2594: raidPtr->raid_cinfo[c].ci_vp = NULL;
2595: raidPtr->Disks[c].auto_configured = 0;
1.68 oster 2596: }
1.166 oster 2597:
1.68 oster 2598: for (r = 0; r < raidPtr->numSpare; r++) {
1.166 oster 2599: vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2600: acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69 oster 2601: rf_close_component(raidPtr, vp, acd);
1.166 oster 2602: raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2603: raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2604: }
1.37 oster 2605: }
1.63 oster 2606:
1.37 oster 2607:
1.186 perry 2608: void
1.353 mrg 2609: rf_ReconThread(struct rf_recon_req_internal *req)
1.37 oster 2610: {
2611: int s;
2612: RF_Raid_t *raidPtr;
2613:
2614: s = splbio();
2615: raidPtr = (RF_Raid_t *) req->raidPtr;
2616: raidPtr->recon_in_progress = 1;
2617:
1.166 oster 2618: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37 oster 2619: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2620:
2621: RF_Free(req, sizeof(*req));
2622:
2623: raidPtr->recon_in_progress = 0;
2624: splx(s);
2625:
2626: /* That's all... */
1.204 simonb 2627: kthread_exit(0); /* does not return */
1.37 oster 2628: }
2629:
2630: void
1.169 oster 2631: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37 oster 2632: {
2633: int retcode;
2634: int s;
2635:
1.184 oster 2636: raidPtr->parity_rewrite_stripes_done = 0;
1.37 oster 2637: raidPtr->parity_rewrite_in_progress = 1;
2638: s = splbio();
2639: retcode = rf_RewriteParity(raidPtr);
2640: splx(s);
2641: if (retcode) {
1.279 christos 2642: printf("raid%d: Error re-writing parity (%d)!\n",
2643: raidPtr->raidid, retcode);
1.37 oster 2644: } else {
2645: /* set the clean bit! If we shutdown correctly,
2646: the clean bit on each component label will get
2647: set */
2648: raidPtr->parity_good = RF_RAID_CLEAN;
2649: }
2650: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2651:
2652: /* Anyone waiting for us to stop? If so, inform them... */
2653: if (raidPtr->waitShutdown) {
1.357 mrg 2654: rf_lock_mutex2(raidPtr->rad_lock);
2655: cv_broadcast(&raidPtr->parity_rewrite_cv);
2656: rf_unlock_mutex2(raidPtr->rad_lock);
1.85 oster 2657: }
1.37 oster 2658:
2659: /* That's all... */
1.204 simonb 2660: kthread_exit(0); /* does not return */
1.37 oster 2661: }
2662:
2663:
2664: void
1.169 oster 2665: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37 oster 2666: {
2667: int s;
2668:
2669: raidPtr->copyback_in_progress = 1;
2670: s = splbio();
2671: rf_CopybackReconstructedData(raidPtr);
2672: splx(s);
2673: raidPtr->copyback_in_progress = 0;
2674:
2675: /* That's all... */
1.204 simonb 2676: kthread_exit(0); /* does not return */
1.37 oster 2677: }
2678:
2679:
2680: void
1.353 mrg 2681: rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
1.37 oster 2682: {
2683: int s;
2684: RF_Raid_t *raidPtr;
1.186 perry 2685:
1.37 oster 2686: s = splbio();
2687: raidPtr = req->raidPtr;
2688: raidPtr->recon_in_progress = 1;
1.166 oster 2689: rf_ReconstructInPlace(raidPtr, req->col);
1.37 oster 2690: RF_Free(req, sizeof(*req));
2691: raidPtr->recon_in_progress = 0;
2692: splx(s);
2693:
2694: /* That's all... */
1.204 simonb 2695: kthread_exit(0); /* does not return */
1.48 oster 2696: }
2697:
1.213 christos 2698: static RF_AutoConfig_t *
2699: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276 mrg 2700: const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2701: unsigned secsize)
1.213 christos 2702: {
2703: int good_one = 0;
2704: RF_ComponentLabel_t *clabel;
2705: RF_AutoConfig_t *ac;
2706:
2707: clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2708: if (clabel == NULL) {
2709: oomem:
2710: while(ac_list) {
2711: ac = ac_list;
2712: if (ac->clabel)
2713: free(ac->clabel, M_RAIDFRAME);
2714: ac_list = ac_list->next;
2715: free(ac, M_RAIDFRAME);
2716: }
2717: printf("RAID auto config: out of memory!\n");
2718: return NULL; /* XXX probably should panic? */
2719: }
2720:
1.276 mrg 2721: if (!raidread_component_label(secsize, dev, vp, clabel)) {
2722: /* Got the label. Does it look reasonable? */
1.284 mrg 2723: if (rf_reasonable_label(clabel, numsecs) &&
1.282 enami 2724: (rf_component_label_partitionsize(clabel) <= size)) {
1.224 oster 2725: #ifdef DEBUG
1.276 mrg 2726: printf("Component on: %s: %llu\n",
1.213 christos 2727: cname, (unsigned long long)size);
1.276 mrg 2728: rf_print_component_label(clabel);
1.213 christos 2729: #endif
1.276 mrg 2730: /* if it's reasonable, add it, else ignore it. */
2731: ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.213 christos 2732: M_NOWAIT);
1.276 mrg 2733: if (ac == NULL) {
2734: free(clabel, M_RAIDFRAME);
2735: goto oomem;
2736: }
2737: strlcpy(ac->devname, cname, sizeof(ac->devname));
2738: ac->dev = dev;
2739: ac->vp = vp;
2740: ac->clabel = clabel;
2741: ac->next = ac_list;
2742: ac_list = ac;
2743: good_one = 1;
2744: }
1.213 christos 2745: }
2746: if (!good_one) {
2747: /* cleanup */
2748: free(clabel, M_RAIDFRAME);
2749: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2750: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213 christos 2751: vput(vp);
2752: }
2753: return ac_list;
2754: }
2755:
1.48 oster 2756: RF_AutoConfig_t *
1.259 cegger 2757: rf_find_raid_components(void)
1.48 oster 2758: {
2759: struct vnode *vp;
2760: struct disklabel label;
1.261 dyoung 2761: device_t dv;
1.268 dyoung 2762: deviter_t di;
1.48 oster 2763: dev_t dev;
1.296 buhrow 2764: int bmajor, bminor, wedge, rf_part_found;
1.48 oster 2765: int error;
2766: int i;
2767: RF_AutoConfig_t *ac_list;
1.276 mrg 2768: uint64_t numsecs;
2769: unsigned secsize;
1.335 mlelstv 2770: int dowedges;
1.48 oster 2771:
2772: /* initialize the AutoConfig list */
2773: ac_list = NULL;
2774:
1.335 mlelstv 2775: /*
2776: * we begin by trolling through *all* the devices on the system *twice*
2777: * first we scan for wedges, second for other devices. This avoids
2778: * using a raw partition instead of a wedge that covers the whole disk
2779: */
1.48 oster 2780:
1.335 mlelstv 2781: for (dowedges=1; dowedges>=0; --dowedges) {
2782: for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2783: dv = deviter_next(&di)) {
1.48 oster 2784:
1.335 mlelstv 2785: /* we are only interested in disks... */
2786: if (device_class(dv) != DV_DISK)
2787: continue;
1.48 oster 2788:
1.335 mlelstv 2789: /* we don't care about floppies... */
2790: if (device_is_a(dv, "fd")) {
2791: continue;
2792: }
1.129 oster 2793:
1.335 mlelstv 2794: /* we don't care about CD's... */
2795: if (device_is_a(dv, "cd")) {
2796: continue;
2797: }
1.129 oster 2798:
1.335 mlelstv 2799: /* we don't care about md's... */
2800: if (device_is_a(dv, "md")) {
2801: continue;
2802: }
1.248 oster 2803:
1.335 mlelstv 2804: /* hdfd is the Atari/Hades floppy driver */
2805: if (device_is_a(dv, "hdfd")) {
2806: continue;
2807: }
1.206 thorpej 2808:
1.335 mlelstv 2809: /* fdisa is the Atari/Milan floppy driver */
2810: if (device_is_a(dv, "fdisa")) {
2811: continue;
2812: }
1.186 perry 2813:
1.335 mlelstv 2814: /* are we in the wedges pass ? */
2815: wedge = device_is_a(dv, "dk");
2816: if (wedge != dowedges) {
2817: continue;
2818: }
1.48 oster 2819:
1.335 mlelstv 2820: /* need to find the device_name_to_block_device_major stuff */
2821: bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.296 buhrow 2822:
1.335 mlelstv 2823: rf_part_found = 0; /*No raid partition as yet*/
1.48 oster 2824:
1.335 mlelstv 2825: /* get a vnode for the raw partition of this disk */
2826: bminor = minor(device_unit(dv));
2827: dev = wedge ? makedev(bmajor, bminor) :
2828: MAKEDISKDEV(bmajor, bminor, RAW_PART);
2829: if (bdevvp(dev, &vp))
2830: panic("RAID can't alloc vnode");
1.48 oster 2831:
1.335 mlelstv 2832: error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48 oster 2833:
1.335 mlelstv 2834: if (error) {
2835: /* "Who cares." Continue looking
2836: for something that exists*/
2837: vput(vp);
2838: continue;
2839: }
1.48 oster 2840:
1.335 mlelstv 2841: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 2842: if (error) {
1.339 mlelstv 2843: /*
2844: * Pseudo devices like vnd and cgd can be
2845: * opened but may still need some configuration.
2846: * Ignore these quietly.
2847: */
2848: if (error != ENXIO)
2849: printf("RAIDframe: can't get disk size"
2850: " for dev %s (%d)\n",
2851: device_xname(dv), error);
1.241 oster 2852: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2853: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2854: vput(vp);
1.213 christos 2855: continue;
2856: }
1.335 mlelstv 2857: if (wedge) {
2858: struct dkwedge_info dkw;
2859: error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2860: NOCRED);
2861: if (error) {
2862: printf("RAIDframe: can't get wedge info for "
2863: "dev %s (%d)\n", device_xname(dv), error);
2864: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2865: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2866: vput(vp);
2867: continue;
2868: }
1.213 christos 2869:
1.335 mlelstv 2870: if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2871: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2872: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2873: vput(vp);
2874: continue;
2875: }
2876:
2877: ac_list = rf_get_component(ac_list, dev, vp,
2878: device_xname(dv), dkw.dkw_size, numsecs, secsize);
2879: rf_part_found = 1; /*There is a raid component on this disk*/
1.228 christos 2880: continue;
1.241 oster 2881: }
1.213 christos 2882:
1.335 mlelstv 2883: /* Ok, the disk exists. Go get the disklabel. */
2884: error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2885: if (error) {
2886: /*
2887: * XXX can't happen - open() would
2888: * have errored out (or faked up one)
2889: */
2890: if (error != ENOTTY)
2891: printf("RAIDframe: can't get label for dev "
2892: "%s (%d)\n", device_xname(dv), error);
2893: }
1.48 oster 2894:
1.335 mlelstv 2895: /* don't need this any more. We'll allocate it again
2896: a little later if we really do... */
2897: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2898: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2899: vput(vp);
1.48 oster 2900:
1.335 mlelstv 2901: if (error)
1.48 oster 2902: continue;
2903:
1.335 mlelstv 2904: rf_part_found = 0; /*No raid partitions yet*/
2905: for (i = 0; i < label.d_npartitions; i++) {
2906: char cname[sizeof(ac_list->devname)];
2907:
2908: /* We only support partitions marked as RAID */
2909: if (label.d_partitions[i].p_fstype != FS_RAID)
2910: continue;
2911:
2912: dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2913: if (bdevvp(dev, &vp))
2914: panic("RAID can't alloc vnode");
2915:
2916: error = VOP_OPEN(vp, FREAD, NOCRED);
2917: if (error) {
2918: /* Whatever... */
2919: vput(vp);
2920: continue;
2921: }
2922: snprintf(cname, sizeof(cname), "%s%c",
2923: device_xname(dv), 'a' + i);
2924: ac_list = rf_get_component(ac_list, dev, vp, cname,
2925: label.d_partitions[i].p_size, numsecs, secsize);
2926: rf_part_found = 1; /*There is at least one raid partition on this disk*/
1.48 oster 2927: }
1.296 buhrow 2928:
1.335 mlelstv 2929: /*
2930: *If there is no raid component on this disk, either in a
2931: *disklabel or inside a wedge, check the raw partition as well,
2932: *as it is possible to configure raid components on raw disk
2933: *devices.
2934: */
1.296 buhrow 2935:
1.335 mlelstv 2936: if (!rf_part_found) {
2937: char cname[sizeof(ac_list->devname)];
1.296 buhrow 2938:
1.335 mlelstv 2939: dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2940: if (bdevvp(dev, &vp))
2941: panic("RAID can't alloc vnode");
2942:
2943: error = VOP_OPEN(vp, FREAD, NOCRED);
2944: if (error) {
2945: /* Whatever... */
2946: vput(vp);
2947: continue;
2948: }
2949: snprintf(cname, sizeof(cname), "%s%c",
2950: device_xname(dv), 'a' + RAW_PART);
2951: ac_list = rf_get_component(ac_list, dev, vp, cname,
2952: label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.296 buhrow 2953: }
1.48 oster 2954: }
1.335 mlelstv 2955: deviter_release(&di);
1.48 oster 2956: }
1.213 christos 2957: return ac_list;
1.48 oster 2958: }
1.186 perry 2959:
1.213 christos 2960:
1.292 oster 2961: int
1.284 mrg 2962: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48 oster 2963: {
1.186 perry 2964:
1.48 oster 2965: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2966: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2967: ((clabel->clean == RF_RAID_CLEAN) ||
2968: (clabel->clean == RF_RAID_DIRTY)) &&
1.186 perry 2969: clabel->row >=0 &&
2970: clabel->column >= 0 &&
1.48 oster 2971: clabel->num_rows > 0 &&
2972: clabel->num_columns > 0 &&
1.186 perry 2973: clabel->row < clabel->num_rows &&
1.48 oster 2974: clabel->column < clabel->num_columns &&
2975: clabel->blockSize > 0 &&
1.282 enami 2976: /*
2977: * numBlocksHi may contain garbage, but it is ok since
2978: * the type is unsigned. If it is really garbage,
2979: * rf_fix_old_label_size() will fix it.
2980: */
2981: rf_component_label_numblocks(clabel) > 0) {
1.284 mrg 2982: /*
2983: * label looks reasonable enough...
2984: * let's make sure it has no old garbage.
2985: */
1.292 oster 2986: if (numsecs)
2987: rf_fix_old_label_size(clabel, numsecs);
1.48 oster 2988: return(1);
2989: }
2990: return(0);
2991: }
2992:
2993:
1.278 mrg 2994: /*
2995: * For reasons yet unknown, some old component labels have garbage in
2996: * the newer numBlocksHi region, and this causes lossage. Since those
2997: * disks will also have numsecs set to less than 32 bits of sectors,
1.299 oster 2998: * we can determine when this corruption has occurred, and fix it.
1.284 mrg 2999: *
3000: * The exact same problem, with the same unknown reason, happens to
3001: * the partitionSizeHi member as well.
1.278 mrg 3002: */
3003: static void
3004: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3005: {
3006:
1.284 mrg 3007: if (numsecs < ((uint64_t)1 << 32)) {
3008: if (clabel->numBlocksHi) {
3009: printf("WARNING: total sectors < 32 bits, yet "
3010: "numBlocksHi set\n"
3011: "WARNING: resetting numBlocksHi to zero.\n");
3012: clabel->numBlocksHi = 0;
3013: }
3014:
3015: if (clabel->partitionSizeHi) {
3016: printf("WARNING: total sectors < 32 bits, yet "
3017: "partitionSizeHi set\n"
3018: "WARNING: resetting partitionSizeHi to zero.\n");
3019: clabel->partitionSizeHi = 0;
3020: }
1.278 mrg 3021: }
3022: }
3023:
3024:
1.224 oster 3025: #ifdef DEBUG
1.48 oster 3026: void
1.169 oster 3027: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48 oster 3028: {
1.282 enami 3029: uint64_t numBlocks;
1.308 christos 3030: static const char *rp[] = {
3031: "No", "Force", "Soft", "*invalid*"
3032: };
3033:
1.275 mrg 3034:
1.282 enami 3035: numBlocks = rf_component_label_numblocks(clabel);
1.275 mrg 3036:
1.48 oster 3037: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186 perry 3038: clabel->row, clabel->column,
1.48 oster 3039: clabel->num_rows, clabel->num_columns);
3040: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3041: clabel->version, clabel->serial_number,
3042: clabel->mod_counter);
3043: printf(" Clean: %s Status: %d\n",
1.271 dyoung 3044: clabel->clean ? "Yes" : "No", clabel->status);
1.48 oster 3045: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3046: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275 mrg 3047: printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3048: (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271 dyoung 3049: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.308 christos 3050: printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
1.271 dyoung 3051: printf(" Last configured as: raid%d\n", clabel->last_unit);
1.51 oster 3052: #if 0
3053: printf(" Config order: %d\n", clabel->config_order);
3054: #endif
1.186 perry 3055:
1.48 oster 3056: }
1.133 oster 3057: #endif
1.48 oster 3058:
3059: RF_ConfigSet_t *
1.169 oster 3060: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48 oster 3061: {
3062: RF_AutoConfig_t *ac;
3063: RF_ConfigSet_t *config_sets;
3064: RF_ConfigSet_t *cset;
3065: RF_AutoConfig_t *ac_next;
3066:
3067:
3068: config_sets = NULL;
3069:
3070: /* Go through the AutoConfig list, and figure out which components
3071: belong to what sets. */
3072: ac = ac_list;
3073: while(ac!=NULL) {
3074: /* we're going to putz with ac->next, so save it here
3075: for use at the end of the loop */
3076: ac_next = ac->next;
3077:
3078: if (config_sets == NULL) {
3079: /* will need at least this one... */
3080: config_sets = (RF_ConfigSet_t *)
1.186 perry 3081: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3082: M_RAIDFRAME, M_NOWAIT);
3083: if (config_sets == NULL) {
1.141 provos 3084: panic("rf_create_auto_sets: No memory!");
1.48 oster 3085: }
3086: /* this one is easy :) */
3087: config_sets->ac = ac;
3088: config_sets->next = NULL;
1.51 oster 3089: config_sets->rootable = 0;
1.48 oster 3090: ac->next = NULL;
3091: } else {
3092: /* which set does this component fit into? */
3093: cset = config_sets;
3094: while(cset!=NULL) {
1.49 oster 3095: if (rf_does_it_fit(cset, ac)) {
1.86 oster 3096: /* looks like it matches... */
3097: ac->next = cset->ac;
3098: cset->ac = ac;
1.48 oster 3099: break;
3100: }
3101: cset = cset->next;
3102: }
3103: if (cset==NULL) {
3104: /* didn't find a match above... new set..*/
3105: cset = (RF_ConfigSet_t *)
1.186 perry 3106: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3107: M_RAIDFRAME, M_NOWAIT);
3108: if (cset == NULL) {
1.141 provos 3109: panic("rf_create_auto_sets: No memory!");
1.48 oster 3110: }
3111: cset->ac = ac;
3112: ac->next = NULL;
3113: cset->next = config_sets;
1.51 oster 3114: cset->rootable = 0;
1.48 oster 3115: config_sets = cset;
3116: }
3117: }
3118: ac = ac_next;
3119: }
3120:
3121:
3122: return(config_sets);
3123: }
3124:
3125: static int
1.169 oster 3126: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48 oster 3127: {
3128: RF_ComponentLabel_t *clabel1, *clabel2;
3129:
3130: /* If this one matches the *first* one in the set, that's good
3131: enough, since the other members of the set would have been
3132: through here too... */
1.60 oster 3133: /* note that we are not checking partitionSize here..
3134:
3135: Note that we are also not checking the mod_counters here.
1.299 oster 3136: If everything else matches except the mod_counter, that's
1.60 oster 3137: good enough for this test. We will deal with the mod_counters
1.186 perry 3138: a little later in the autoconfiguration process.
1.60 oster 3139:
3140: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 3141:
3142: The reason we don't check for this is that failed disks
3143: will have lower modification counts. If those disks are
3144: not added to the set they used to belong to, then they will
3145: form their own set, which may result in 2 different sets,
3146: for example, competing to be configured at raid0, and
3147: perhaps competing to be the root filesystem set. If the
3148: wrong ones get configured, or both attempt to become /,
3149: weird behaviour and or serious lossage will occur. Thus we
3150: need to bring them into the fold here, and kick them out at
3151: a later point.
1.60 oster 3152:
3153: */
1.48 oster 3154:
3155: clabel1 = cset->ac->clabel;
3156: clabel2 = ac->clabel;
3157: if ((clabel1->version == clabel2->version) &&
3158: (clabel1->serial_number == clabel2->serial_number) &&
3159: (clabel1->num_rows == clabel2->num_rows) &&
3160: (clabel1->num_columns == clabel2->num_columns) &&
3161: (clabel1->sectPerSU == clabel2->sectPerSU) &&
3162: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3163: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3164: (clabel1->parityConfig == clabel2->parityConfig) &&
3165: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3166: (clabel1->blockSize == clabel2->blockSize) &&
1.282 enami 3167: rf_component_label_numblocks(clabel1) ==
3168: rf_component_label_numblocks(clabel2) &&
1.48 oster 3169: (clabel1->autoconfigure == clabel2->autoconfigure) &&
3170: (clabel1->root_partition == clabel2->root_partition) &&
3171: (clabel1->last_unit == clabel2->last_unit) &&
3172: (clabel1->config_order == clabel2->config_order)) {
3173: /* if it get's here, it almost *has* to be a match */
3174: } else {
1.186 perry 3175: /* it's not consistent with somebody in the set..
1.48 oster 3176: punt */
3177: return(0);
3178: }
3179: /* all was fine.. it must fit... */
3180: return(1);
3181: }
3182:
3183: int
1.169 oster 3184: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48 oster 3185: {
1.51 oster 3186: RF_AutoConfig_t *ac;
3187: RF_AutoConfig_t *auto_config;
3188: RF_ComponentLabel_t *clabel;
1.166 oster 3189: int c;
1.51 oster 3190: int num_cols;
3191: int num_missing;
1.86 oster 3192: int mod_counter;
1.87 oster 3193: int mod_counter_found;
1.88 oster 3194: int even_pair_failed;
3195: char parity_type;
1.186 perry 3196:
1.51 oster 3197:
1.48 oster 3198: /* check to see that we have enough 'live' components
3199: of this set. If so, we can configure it if necessary */
3200:
1.51 oster 3201: num_cols = cset->ac->clabel->num_columns;
1.88 oster 3202: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 3203:
3204: /* XXX Check for duplicate components!?!?!? */
3205:
1.86 oster 3206: /* Determine what the mod_counter is supposed to be for this set. */
3207:
1.87 oster 3208: mod_counter_found = 0;
1.101 oster 3209: mod_counter = 0;
1.86 oster 3210: ac = cset->ac;
3211: while(ac!=NULL) {
1.87 oster 3212: if (mod_counter_found==0) {
1.86 oster 3213: mod_counter = ac->clabel->mod_counter;
1.87 oster 3214: mod_counter_found = 1;
3215: } else {
3216: if (ac->clabel->mod_counter > mod_counter) {
3217: mod_counter = ac->clabel->mod_counter;
3218: }
1.86 oster 3219: }
3220: ac = ac->next;
3221: }
3222:
1.51 oster 3223: num_missing = 0;
3224: auto_config = cset->ac;
3225:
1.166 oster 3226: even_pair_failed = 0;
3227: for(c=0; c<num_cols; c++) {
3228: ac = auto_config;
3229: while(ac!=NULL) {
1.186 perry 3230: if ((ac->clabel->column == c) &&
1.166 oster 3231: (ac->clabel->mod_counter == mod_counter)) {
3232: /* it's this one... */
1.224 oster 3233: #ifdef DEBUG
1.166 oster 3234: printf("Found: %s at %d\n",
3235: ac->devname,c);
1.51 oster 3236: #endif
1.166 oster 3237: break;
1.51 oster 3238: }
1.166 oster 3239: ac=ac->next;
3240: }
3241: if (ac==NULL) {
1.51 oster 3242: /* Didn't find one here! */
1.88 oster 3243: /* special case for RAID 1, especially
3244: where there are more than 2
3245: components (where RAIDframe treats
3246: things a little differently :( ) */
1.166 oster 3247: if (parity_type == '1') {
3248: if (c%2 == 0) { /* even component */
3249: even_pair_failed = 1;
3250: } else { /* odd component. If
3251: we're failed, and
3252: so is the even
3253: component, it's
3254: "Good Night, Charlie" */
3255: if (even_pair_failed == 1) {
3256: return(0);
1.88 oster 3257: }
3258: }
1.166 oster 3259: } else {
3260: /* normal accounting */
3261: num_missing++;
1.88 oster 3262: }
1.166 oster 3263: }
3264: if ((parity_type == '1') && (c%2 == 1)) {
1.88 oster 3265: /* Just did an even component, and we didn't
1.186 perry 3266: bail.. reset the even_pair_failed flag,
1.88 oster 3267: and go on to the next component.... */
1.166 oster 3268: even_pair_failed = 0;
1.51 oster 3269: }
3270: }
3271:
3272: clabel = cset->ac->clabel;
3273:
3274: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3275: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3276: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3277: /* XXX this needs to be made *much* more general */
3278: /* Too many failures */
3279: return(0);
3280: }
3281: /* otherwise, all is well, and we've got enough to take a kick
3282: at autoconfiguring this set */
3283: return(1);
1.48 oster 3284: }
3285:
3286: void
1.169 oster 3287: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222 christos 3288: RF_Raid_t *raidPtr)
1.48 oster 3289: {
3290: RF_ComponentLabel_t *clabel;
1.77 oster 3291: int i;
1.48 oster 3292:
3293: clabel = ac->clabel;
3294:
3295: /* 1. Fill in the common stuff */
3296: config->numCol = clabel->num_columns;
3297: config->numSpare = 0; /* XXX should this be set here? */
3298: config->sectPerSU = clabel->sectPerSU;
3299: config->SUsPerPU = clabel->SUsPerPU;
3300: config->SUsPerRU = clabel->SUsPerRU;
3301: config->parityConfig = clabel->parityConfig;
3302: /* XXX... */
3303: strcpy(config->diskQueueType,"fifo");
3304: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3305: config->layoutSpecificSize = 0; /* XXX ?? */
3306:
3307: while(ac!=NULL) {
3308: /* row/col values will be in range due to the checks
3309: in reasonable_label() */
1.166 oster 3310: strcpy(config->devnames[0][ac->clabel->column],
1.48 oster 3311: ac->devname);
3312: ac = ac->next;
3313: }
3314:
1.77 oster 3315: for(i=0;i<RF_MAXDBGV;i++) {
1.163 fvdl 3316: config->debugVars[i][0] = 0;
1.77 oster 3317: }
1.48 oster 3318: }
3319:
3320: int
1.169 oster 3321: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3322: {
1.269 jld 3323: RF_ComponentLabel_t *clabel;
1.166 oster 3324: int column;
1.148 oster 3325: int sparecol;
1.48 oster 3326:
1.54 oster 3327: raidPtr->autoconfigure = new_value;
1.166 oster 3328:
3329: for(column=0; column<raidPtr->numCol; column++) {
3330: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3331: clabel = raidget_component_label(raidPtr, column);
3332: clabel->autoconfigure = new_value;
3333: raidflush_component_label(raidPtr, column);
1.48 oster 3334: }
3335: }
1.148 oster 3336: for(column = 0; column < raidPtr->numSpare ; column++) {
3337: sparecol = raidPtr->numCol + column;
1.166 oster 3338: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3339: clabel = raidget_component_label(raidPtr, sparecol);
3340: clabel->autoconfigure = new_value;
3341: raidflush_component_label(raidPtr, sparecol);
1.148 oster 3342: }
3343: }
1.48 oster 3344: return(new_value);
3345: }
3346:
3347: int
1.169 oster 3348: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3349: {
1.269 jld 3350: RF_ComponentLabel_t *clabel;
1.166 oster 3351: int column;
1.148 oster 3352: int sparecol;
1.48 oster 3353:
1.54 oster 3354: raidPtr->root_partition = new_value;
1.166 oster 3355: for(column=0; column<raidPtr->numCol; column++) {
3356: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3357: clabel = raidget_component_label(raidPtr, column);
3358: clabel->root_partition = new_value;
3359: raidflush_component_label(raidPtr, column);
1.148 oster 3360: }
3361: }
3362: for(column = 0; column < raidPtr->numSpare ; column++) {
3363: sparecol = raidPtr->numCol + column;
1.166 oster 3364: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3365: clabel = raidget_component_label(raidPtr, sparecol);
3366: clabel->root_partition = new_value;
3367: raidflush_component_label(raidPtr, sparecol);
1.48 oster 3368: }
3369: }
3370: return(new_value);
3371: }
3372:
3373: void
1.169 oster 3374: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48 oster 3375: {
3376: RF_AutoConfig_t *ac;
1.186 perry 3377:
1.48 oster 3378: ac = cset->ac;
3379: while(ac!=NULL) {
3380: /* Close the vp, and give it back */
3381: if (ac->vp) {
1.96 oster 3382: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 3383: VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
1.48 oster 3384: vput(ac->vp);
1.86 oster 3385: ac->vp = NULL;
1.48 oster 3386: }
3387: ac = ac->next;
3388: }
3389: }
3390:
3391:
3392: void
1.169 oster 3393: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48 oster 3394: {
3395: RF_AutoConfig_t *ac;
3396: RF_AutoConfig_t *next_ac;
1.186 perry 3397:
1.48 oster 3398: ac = cset->ac;
3399: while(ac!=NULL) {
3400: next_ac = ac->next;
3401: /* nuke the label */
3402: free(ac->clabel, M_RAIDFRAME);
3403: /* cleanup the config structure */
3404: free(ac, M_RAIDFRAME);
3405: /* "next.." */
3406: ac = next_ac;
3407: }
3408: /* and, finally, nuke the config set */
3409: free(cset, M_RAIDFRAME);
3410: }
3411:
3412:
3413: void
1.169 oster 3414: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48 oster 3415: {
3416: /* current version number */
1.186 perry 3417: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3418: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3419: clabel->mod_counter = raidPtr->mod_counter;
1.269 jld 3420:
1.166 oster 3421: clabel->num_rows = 1;
1.48 oster 3422: clabel->num_columns = raidPtr->numCol;
3423: clabel->clean = RF_RAID_DIRTY; /* not clean */
3424: clabel->status = rf_ds_optimal; /* "It's good!" */
1.186 perry 3425:
1.48 oster 3426: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3427: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3428: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3429:
3430: clabel->blockSize = raidPtr->bytesPerSector;
1.282 enami 3431: rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54 oster 3432:
1.48 oster 3433: /* XXX not portable */
3434: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3435: clabel->maxOutstanding = raidPtr->maxOutstanding;
3436: clabel->autoconfigure = raidPtr->autoconfigure;
3437: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3438: clabel->last_unit = raidPtr->raidid;
1.54 oster 3439: clabel->config_order = raidPtr->config_order;
1.269 jld 3440:
3441: #ifndef RF_NO_PARITY_MAP
3442: rf_paritymap_init_label(raidPtr->parity_map, clabel);
3443: #endif
1.51 oster 3444: }
3445:
1.300 christos 3446: struct raid_softc *
3447: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51 oster 3448: {
3449: RF_Raid_t *raidPtr;
3450: RF_Config_t *config;
3451: int raidID;
1.300 christos 3452: struct raid_softc *sc;
1.51 oster 3453:
1.224 oster 3454: #ifdef DEBUG
1.72 oster 3455: printf("RAID autoconfigure\n");
1.127 oster 3456: #endif
1.51 oster 3457:
3458: /* 1. Create a config structure */
1.300 christos 3459: config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3460: if (config == NULL) {
1.327 pgoyette 3461: printf("%s: Out of mem - config!?!?\n", __func__);
1.51 oster 3462: /* XXX do something more intelligent here. */
1.300 christos 3463: return NULL;
1.51 oster 3464: }
1.77 oster 3465:
1.186 perry 3466: /*
3467: 2. Figure out what RAID ID this one is supposed to live at
1.51 oster 3468: See if we can get the same RAID dev that it was configured
1.186 perry 3469: on last time..
1.51 oster 3470: */
3471:
3472: raidID = cset->ac->clabel->last_unit;
1.327 pgoyette 3473: for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3474: sc = raidget(++raidID, false))
1.300 christos 3475: continue;
1.224 oster 3476: #ifdef DEBUG
1.72 oster 3477: printf("Configuring raid%d:\n",raidID);
1.127 oster 3478: #endif
3479:
1.327 pgoyette 3480: if (sc == NULL)
3481: sc = raidget(raidID, true);
3482: if (sc == NULL) {
3483: printf("%s: Out of mem - softc!?!?\n", __func__);
3484: /* XXX do something more intelligent here. */
3485: free(config, M_RAIDFRAME);
3486: return NULL;
3487: }
3488:
1.300 christos 3489: raidPtr = &sc->sc_r;
1.51 oster 3490:
3491: /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302 christos 3492: raidPtr->softc = sc;
1.51 oster 3493: raidPtr->raidid = raidID;
3494: raidPtr->openings = RAIDOUTSTANDING;
3495:
3496: /* 3. Build the configuration structure */
3497: rf_create_configuration(cset->ac, config, raidPtr);
3498:
3499: /* 4. Do the configuration */
1.300 christos 3500: if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3501: raidinit(sc);
1.186 perry 3502:
1.300 christos 3503: rf_markalldirty(raidPtr);
3504: raidPtr->autoconfigure = 1; /* XXX do this here? */
1.308 christos 3505: switch (cset->ac->clabel->root_partition) {
3506: case 1: /* Force Root */
3507: case 2: /* Soft Root: root when boot partition part of raid */
3508: /*
3509: * everything configured just fine. Make a note
3510: * that this set is eligible to be root,
3511: * or forced to be root
3512: */
3513: cset->rootable = cset->ac->clabel->root_partition;
1.54 oster 3514: /* XXX do this here? */
1.308 christos 3515: raidPtr->root_partition = cset->rootable;
3516: break;
3517: default:
3518: break;
1.51 oster 3519: }
1.300 christos 3520: } else {
3521: raidput(sc);
3522: sc = NULL;
1.51 oster 3523: }
3524:
3525: /* 5. Cleanup */
3526: free(config, M_RAIDFRAME);
1.300 christos 3527: return sc;
1.99 oster 3528: }
3529:
3530: void
1.187 christos 3531: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3532: size_t xmin, size_t xmax)
1.177 oster 3533: {
1.352 christos 3534: int error;
3535:
1.227 ad 3536: pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187 christos 3537: pool_sethiwat(p, xmax);
1.352 christos 3538: if ((error = pool_prime(p, xmin)) != 0)
3539: panic("%s: failed to prime pool: %d", __func__, error);
1.187 christos 3540: pool_setlowat(p, xmin);
1.177 oster 3541: }
1.190 oster 3542:
3543: /*
1.335 mlelstv 3544: * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3545: * to see if there is IO pending and if that IO could possibly be done
3546: * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
1.190 oster 3547: * otherwise.
3548: *
3549: */
3550: int
1.300 christos 3551: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190 oster 3552: {
1.335 mlelstv 3553: struct raid_softc *rs;
3554: struct dk_softc *dksc;
3555:
3556: rs = raidPtr->softc;
3557: dksc = &rs->sc_dksc;
3558:
3559: if ((rs->sc_flags & RAIDF_INITED) == 0)
3560: return 1;
3561:
3562: if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
1.190 oster 3563: /* there is work to do */
3564: return 0;
1.335 mlelstv 3565: }
1.190 oster 3566: /* default is nothing to do */
3567: return 1;
3568: }
1.213 christos 3569:
3570: int
1.294 oster 3571: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213 christos 3572: {
1.275 mrg 3573: uint64_t numsecs;
3574: unsigned secsize;
1.213 christos 3575: int error;
3576:
1.275 mrg 3577: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 3578: if (error == 0) {
1.275 mrg 3579: diskPtr->blockSize = secsize;
3580: diskPtr->numBlocks = numsecs - rf_protectedSectors;
3581: diskPtr->partitionSize = numsecs;
1.213 christos 3582: return 0;
3583: }
3584: return error;
3585: }
1.217 oster 3586:
3587: static int
1.261 dyoung 3588: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217 oster 3589: {
3590: return 1;
3591: }
3592:
3593: static void
1.261 dyoung 3594: raid_attach(device_t parent, device_t self, void *aux)
1.217 oster 3595: {
3596: }
3597:
3598:
3599: static int
1.261 dyoung 3600: raid_detach(device_t self, int flags)
1.217 oster 3601: {
1.266 dyoung 3602: int error;
1.335 mlelstv 3603: struct raid_softc *rs = raidsoftc(self);
1.303 christos 3604:
3605: if (rs == NULL)
3606: return ENXIO;
1.266 dyoung 3607:
3608: if ((error = raidlock(rs)) != 0)
3609: return (error);
1.217 oster 3610:
1.266 dyoung 3611: error = raid_detach_unlocked(rs);
3612:
1.332 mlelstv 3613: raidunlock(rs);
3614:
3615: /* XXX raid can be referenced here */
3616:
3617: if (error)
3618: return error;
3619:
3620: /* Free the softc */
3621: raidput(rs);
3622:
3623: return 0;
1.217 oster 3624: }
3625:
1.234 oster 3626: static void
1.304 christos 3627: rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
1.234 oster 3628: {
1.335 mlelstv 3629: struct dk_softc *dksc = &rs->sc_dksc;
3630: struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1.304 christos 3631:
3632: memset(dg, 0, sizeof(*dg));
3633:
3634: dg->dg_secperunit = raidPtr->totalSectors;
3635: dg->dg_secsize = raidPtr->bytesPerSector;
3636: dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3637: dg->dg_ntracks = 4 * raidPtr->numCol;
3638:
1.335 mlelstv 3639: disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1.234 oster 3640: }
1.252 oster 3641:
1.348 jdolecek 3642: /*
3643: * Get cache info for all the components (including spares).
3644: * Returns intersection of all the cache flags of all disks, or first
3645: * error if any encountered.
3646: * XXXfua feature flags can change as spares are added - lock down somehow
3647: */
3648: static int
3649: rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3650: {
3651: int c;
3652: int error;
3653: int dkwhole = 0, dkpart;
3654:
3655: for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3656: /*
3657: * Check any non-dead disk, even when currently being
3658: * reconstructed.
3659: */
3660: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3661: || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3662: error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3663: DIOCGCACHE, &dkpart, FREAD, NOCRED);
3664: if (error) {
3665: if (error != ENODEV) {
3666: printf("raid%d: get cache for component %s failed\n",
3667: raidPtr->raidid,
3668: raidPtr->Disks[c].devname);
3669: }
3670:
3671: return error;
3672: }
3673:
3674: if (c == 0)
3675: dkwhole = dkpart;
3676: else
3677: dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3678: }
3679: }
3680:
1.349 jdolecek 3681: *data = dkwhole;
1.348 jdolecek 3682:
3683: return 0;
3684: }
3685:
1.252 oster 3686: /*
3687: * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3688: * We end up returning whatever error was returned by the first cache flush
3689: * that fails.
3690: */
3691:
1.269 jld 3692: int
1.252 oster 3693: rf_sync_component_caches(RF_Raid_t *raidPtr)
3694: {
3695: int c, sparecol;
3696: int e,error;
3697: int force = 1;
3698:
3699: error = 0;
3700: for (c = 0; c < raidPtr->numCol; c++) {
3701: if (raidPtr->Disks[c].status == rf_ds_optimal) {
3702: e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3703: &force, FWRITE, NOCRED);
3704: if (e) {
1.255 oster 3705: if (e != ENODEV)
3706: printf("raid%d: cache flush to component %s failed.\n",
3707: raidPtr->raidid, raidPtr->Disks[c].devname);
1.252 oster 3708: if (error == 0) {
3709: error = e;
3710: }
3711: }
3712: }
3713: }
3714:
3715: for( c = 0; c < raidPtr->numSpare ; c++) {
3716: sparecol = raidPtr->numCol + c;
3717: /* Need to ensure that the reconstruct actually completed! */
3718: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3719: e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3720: DIOCCACHESYNC, &force, FWRITE, NOCRED);
3721: if (e) {
1.255 oster 3722: if (e != ENODEV)
3723: printf("raid%d: cache flush to component %s failed.\n",
3724: raidPtr->raidid, raidPtr->Disks[sparecol].devname);
1.252 oster 3725: if (error == 0) {
3726: error = e;
3727: }
3728: }
3729: }
3730: }
3731: return error;
3732: }
1.327 pgoyette 3733:
1.353 mrg 3734: /* Fill in info with the current status */
3735: void
3736: rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3737: {
3738:
3739: if (raidPtr->status != rf_rs_reconstructing) {
3740: info->total = 100;
3741: info->completed = 100;
3742: } else {
3743: info->total = raidPtr->reconControl->numRUsTotal;
3744: info->completed = raidPtr->reconControl->numRUsComplete;
3745: }
3746: info->remaining = info->total - info->completed;
3747: }
3748:
3749: /* Fill in info with the current status */
3750: void
3751: rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3752: {
3753:
3754: if (raidPtr->parity_rewrite_in_progress == 1) {
3755: info->total = raidPtr->Layout.numStripe;
3756: info->completed = raidPtr->parity_rewrite_stripes_done;
3757: } else {
3758: info->completed = 100;
3759: info->total = 100;
3760: }
3761: info->remaining = info->total - info->completed;
3762: }
3763:
3764: /* Fill in info with the current status */
3765: void
3766: rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3767: {
3768:
3769: if (raidPtr->copyback_in_progress == 1) {
3770: info->total = raidPtr->Layout.numStripe;
3771: info->completed = raidPtr->copyback_stripes_done;
3772: info->remaining = info->total - info->completed;
3773: } else {
3774: info->remaining = 0;
3775: info->completed = 100;
3776: info->total = 100;
3777: }
3778: }
3779:
3780: /* Fill in config with the current info */
3781: int
3782: rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3783: {
3784: int d, i, j;
3785:
3786: if (!raidPtr->valid)
3787: return (ENODEV);
3788: config->cols = raidPtr->numCol;
3789: config->ndevs = raidPtr->numCol;
3790: if (config->ndevs >= RF_MAX_DISKS)
3791: return (ENOMEM);
3792: config->nspares = raidPtr->numSpare;
3793: if (config->nspares >= RF_MAX_DISKS)
3794: return (ENOMEM);
3795: config->maxqdepth = raidPtr->maxQueueDepth;
3796: d = 0;
3797: for (j = 0; j < config->cols; j++) {
3798: config->devs[d] = raidPtr->Disks[j];
3799: d++;
3800: }
3801: for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3802: config->spares[i] = raidPtr->Disks[j];
3803: if (config->spares[i].status == rf_ds_rebuilding_spare) {
3804: /* XXX: raidctl(8) expects to see this as a used spare */
3805: config->spares[i].status = rf_ds_used_spare;
3806: }
3807: }
3808: return 0;
3809: }
3810:
3811: int
3812: rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3813: {
3814: RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3815: RF_ComponentLabel_t *raid_clabel;
3816: int column = clabel->column;
3817:
3818: if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3819: return EINVAL;
3820: raid_clabel = raidget_component_label(raidPtr, column);
3821: memcpy(clabel, raid_clabel, sizeof *clabel);
3822:
3823: return 0;
3824: }
3825:
1.327 pgoyette 3826: /*
3827: * Module interface
3828: */
3829:
1.356 pgoyette 3830: MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
1.327 pgoyette 3831:
3832: #ifdef _MODULE
3833: CFDRIVER_DECL(raid, DV_DISK, NULL);
3834: #endif
3835:
3836: static int raid_modcmd(modcmd_t, void *);
3837: static int raid_modcmd_init(void);
3838: static int raid_modcmd_fini(void);
3839:
3840: static int
3841: raid_modcmd(modcmd_t cmd, void *data)
3842: {
3843: int error;
3844:
3845: error = 0;
3846: switch (cmd) {
3847: case MODULE_CMD_INIT:
3848: error = raid_modcmd_init();
3849: break;
3850: case MODULE_CMD_FINI:
3851: error = raid_modcmd_fini();
3852: break;
3853: default:
3854: error = ENOTTY;
3855: break;
3856: }
3857: return error;
3858: }
3859:
3860: static int
3861: raid_modcmd_init(void)
3862: {
3863: int error;
3864: int bmajor, cmajor;
3865:
3866: mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3867: mutex_enter(&raid_lock);
3868: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3869: rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3870: rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3871: rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3872:
3873: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3874: #endif
3875:
3876: bmajor = cmajor = -1;
3877: error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3878: &raid_cdevsw, &cmajor);
3879: if (error != 0 && error != EEXIST) {
3880: aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3881: mutex_exit(&raid_lock);
3882: return error;
3883: }
3884: #ifdef _MODULE
3885: error = config_cfdriver_attach(&raid_cd);
3886: if (error != 0) {
3887: aprint_error("%s: config_cfdriver_attach failed %d\n",
3888: __func__, error);
3889: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3890: mutex_exit(&raid_lock);
3891: return error;
3892: }
3893: #endif
3894: error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3895: if (error != 0) {
3896: aprint_error("%s: config_cfattach_attach failed %d\n",
3897: __func__, error);
3898: #ifdef _MODULE
3899: config_cfdriver_detach(&raid_cd);
3900: #endif
3901: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3902: mutex_exit(&raid_lock);
3903: return error;
3904: }
3905:
3906: raidautoconfigdone = false;
3907:
3908: mutex_exit(&raid_lock);
3909:
3910: if (error == 0) {
3911: if (rf_BootRaidframe(true) == 0)
3912: aprint_verbose("Kernelized RAIDframe activated\n");
3913: else
3914: panic("Serious error activating RAID!!");
3915: }
3916:
3917: /*
3918: * Register a finalizer which will be used to auto-config RAID
3919: * sets once all real hardware devices have been found.
3920: */
3921: error = config_finalize_register(NULL, rf_autoconfig);
3922: if (error != 0) {
3923: aprint_error("WARNING: unable to register RAIDframe "
3924: "finalizer\n");
1.329 pgoyette 3925: error = 0;
1.327 pgoyette 3926: }
3927:
3928: return error;
3929: }
3930:
3931: static int
3932: raid_modcmd_fini(void)
3933: {
3934: int error;
3935:
3936: mutex_enter(&raid_lock);
3937:
3938: /* Don't allow unload if raid device(s) exist. */
3939: if (!LIST_EMPTY(&raids)) {
3940: mutex_exit(&raid_lock);
3941: return EBUSY;
3942: }
3943:
3944: error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3945: if (error != 0) {
1.335 mlelstv 3946: aprint_error("%s: cannot detach cfattach\n",__func__);
1.327 pgoyette 3947: mutex_exit(&raid_lock);
3948: return error;
3949: }
3950: #ifdef _MODULE
3951: error = config_cfdriver_detach(&raid_cd);
3952: if (error != 0) {
1.335 mlelstv 3953: aprint_error("%s: cannot detach cfdriver\n",__func__);
1.327 pgoyette 3954: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3955: mutex_exit(&raid_lock);
3956: return error;
3957: }
3958: #endif
3959: error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3960: if (error != 0) {
1.335 mlelstv 3961: aprint_error("%s: cannot detach devsw\n",__func__);
1.327 pgoyette 3962: #ifdef _MODULE
3963: config_cfdriver_attach(&raid_cd);
3964: #endif
3965: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3966: mutex_exit(&raid_lock);
3967: return error;
3968: }
3969: rf_BootRaidframe(false);
3970: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3971: rf_destroy_mutex2(rf_sparet_wait_mutex);
3972: rf_destroy_cond2(rf_sparet_wait_cv);
3973: rf_destroy_cond2(rf_sparet_resp_cv);
3974: #endif
3975: mutex_exit(&raid_lock);
3976: mutex_destroy(&raid_lock);
3977:
3978: return error;
3979: }
CVSweb <webmaster@jp.NetBSD.org>