Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.376.4.1
1.376.4.1! martin 1: /* $NetBSD: rf_netbsdkintf.c,v 1.376 2019/03/01 11:06:56 pgoyette Exp $ */
1.281 rmind 2:
1.1 oster 3: /*-
1.295 erh 4: * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1 oster 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Greg Oster; Jason R. Thorpe.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
1.281 rmind 33: * Copyright (c) 1988 University of Utah.
1.1 oster 34: * Copyright (c) 1990, 1993
35: * The Regents of the University of California. All rights reserved.
36: *
37: * This code is derived from software contributed to Berkeley by
38: * the Systems Programming Group of the University of Utah Computer
39: * Science Department.
40: *
41: * Redistribution and use in source and binary forms, with or without
42: * modification, are permitted provided that the following conditions
43: * are met:
44: * 1. Redistributions of source code must retain the above copyright
45: * notice, this list of conditions and the following disclaimer.
46: * 2. Redistributions in binary form must reproduce the above copyright
47: * notice, this list of conditions and the following disclaimer in the
48: * documentation and/or other materials provided with the distribution.
1.162 agc 49: * 3. Neither the name of the University nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63: * SUCH DAMAGE.
64: *
1.376.4.1! martin 65: * from: Utah $Hdr$
1.162 agc 66: *
67: * @(#)cd.c 8.2 (Berkeley) 11/16/93
68: */
69:
70: /*
1.1 oster 71: * Copyright (c) 1995 Carnegie-Mellon University.
72: * All rights reserved.
73: *
74: * Authors: Mark Holland, Jim Zelenka
75: *
76: * Permission to use, copy, modify and distribute this software and
77: * its documentation is hereby granted, provided that both the copyright
78: * notice and this permission notice appear in all copies of the
79: * software, derivative works or modified versions, and any portions
80: * thereof, and that both notices appear in supporting documentation.
81: *
82: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85: *
86: * Carnegie Mellon requests users of this software to return to
87: *
88: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
89: * School of Computer Science
90: * Carnegie Mellon University
91: * Pittsburgh PA 15213-3890
92: *
93: * any improvements or extensions that they make and grant Carnegie the
94: * rights to redistribute these changes.
95: */
96:
97: /***********************************************************
98: *
99: * rf_kintf.c -- the kernel interface routines for RAIDframe
100: *
101: ***********************************************************/
1.112 lukem 102:
103: #include <sys/cdefs.h>
1.376.4.1! martin 104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.376 2019/03/01 11:06:56 pgoyette Exp $");
1.251 ad 105:
106: #ifdef _KERNEL_OPT
107: #include "opt_raid_autoconfig.h"
1.363 mrg 108: #include "opt_compat_netbsd32.h"
1.251 ad 109: #endif
1.1 oster 110:
1.113 lukem 111: #include <sys/param.h>
1.1 oster 112: #include <sys/errno.h>
113: #include <sys/pool.h>
1.152 thorpej 114: #include <sys/proc.h>
1.1 oster 115: #include <sys/queue.h>
116: #include <sys/disk.h>
117: #include <sys/device.h>
118: #include <sys/stat.h>
119: #include <sys/ioctl.h>
120: #include <sys/fcntl.h>
121: #include <sys/systm.h>
122: #include <sys/vnode.h>
123: #include <sys/disklabel.h>
124: #include <sys/conf.h>
125: #include <sys/buf.h>
1.182 yamt 126: #include <sys/bufq.h>
1.65 oster 127: #include <sys/reboot.h>
1.208 elad 128: #include <sys/kauth.h>
1.327 pgoyette 129: #include <sys/module.h>
1.358 pgoyette 130: #include <sys/compat_stub.h>
1.8 oster 131:
1.234 oster 132: #include <prop/proplib.h>
133:
1.110 oster 134: #include <dev/raidframe/raidframevar.h>
135: #include <dev/raidframe/raidframeio.h>
1.269 jld 136: #include <dev/raidframe/rf_paritymap.h>
1.251 ad 137:
1.1 oster 138: #include "rf_raid.h"
1.44 oster 139: #include "rf_copyback.h"
1.1 oster 140: #include "rf_dag.h"
141: #include "rf_dagflags.h"
1.99 oster 142: #include "rf_desc.h"
1.1 oster 143: #include "rf_diskqueue.h"
144: #include "rf_etimer.h"
145: #include "rf_general.h"
146: #include "rf_kintf.h"
147: #include "rf_options.h"
148: #include "rf_driver.h"
149: #include "rf_parityscan.h"
150: #include "rf_threadstuff.h"
151:
1.325 christos 152: #include "ioconf.h"
153:
1.133 oster 154: #ifdef DEBUG
1.9 oster 155: int rf_kdebug_level = 0;
1.1 oster 156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 157: #else /* DEBUG */
1.1 oster 158: #define db1_printf(a) { }
1.9 oster 159: #endif /* DEBUG */
1.1 oster 160:
1.344 christos 161: #ifdef DEBUG_ROOT
162: #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
1.345 christos 163: #else
164: #define DPRINTF(a, ...)
1.344 christos 165: #endif
166:
1.249 oster 167: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289 mrg 168: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287 mrg 169: static rf_declare_cond2(rf_sparet_wait_cv);
170: static rf_declare_cond2(rf_sparet_resp_cv);
1.1 oster 171:
1.10 oster 172: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173: * spare table */
174: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175: * installation process */
1.249 oster 176: #endif
1.153 thorpej 177:
178: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10 oster 179:
1.1 oster 180: /* prototypes */
1.187 christos 181: static void KernelWakeupFunc(struct buf *);
182: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225 christos 183: dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.187 christos 184: void *, int, struct proc *);
1.300 christos 185: static void raidinit(struct raid_softc *);
1.335 mlelstv 186: static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
1.348 jdolecek 187: static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
1.1 oster 188:
1.261 dyoung 189: static int raid_match(device_t, cfdata_t, void *);
190: static void raid_attach(device_t, device_t, void *);
191: static int raid_detach(device_t, int);
1.130 gehenna 192:
1.269 jld 193: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
194: daddr_t, daddr_t);
195: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
196: daddr_t, daddr_t, int);
197:
1.276 mrg 198: static int raidwrite_component_label(unsigned,
199: dev_t, struct vnode *, RF_ComponentLabel_t *);
200: static int raidread_component_label(unsigned,
201: dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269 jld 202:
1.335 mlelstv 203: static int raid_diskstart(device_t, struct buf *bp);
204: static int raid_dumpblocks(device_t, void *, daddr_t, int);
205: static int raid_lastclose(device_t);
1.269 jld 206:
1.324 mrg 207: static dev_type_open(raidopen);
208: static dev_type_close(raidclose);
209: static dev_type_read(raidread);
210: static dev_type_write(raidwrite);
211: static dev_type_ioctl(raidioctl);
212: static dev_type_strategy(raidstrategy);
213: static dev_type_dump(raiddump);
214: static dev_type_size(raidsize);
1.130 gehenna 215:
216: const struct bdevsw raid_bdevsw = {
1.305 dholland 217: .d_open = raidopen,
218: .d_close = raidclose,
219: .d_strategy = raidstrategy,
220: .d_ioctl = raidioctl,
221: .d_dump = raiddump,
222: .d_psize = raidsize,
1.311 dholland 223: .d_discard = nodiscard,
1.305 dholland 224: .d_flag = D_DISK
1.130 gehenna 225: };
226:
227: const struct cdevsw raid_cdevsw = {
1.305 dholland 228: .d_open = raidopen,
229: .d_close = raidclose,
230: .d_read = raidread,
231: .d_write = raidwrite,
232: .d_ioctl = raidioctl,
233: .d_stop = nostop,
234: .d_tty = notty,
235: .d_poll = nopoll,
236: .d_mmap = nommap,
237: .d_kqfilter = nokqfilter,
1.312 dholland 238: .d_discard = nodiscard,
1.305 dholland 239: .d_flag = D_DISK
1.130 gehenna 240: };
1.1 oster 241:
1.323 mlelstv 242: static struct dkdriver rf_dkdriver = {
1.335 mlelstv 243: .d_open = raidopen,
244: .d_close = raidclose,
1.323 mlelstv 245: .d_strategy = raidstrategy,
1.335 mlelstv 246: .d_diskstart = raid_diskstart,
247: .d_dumpblocks = raid_dumpblocks,
248: .d_lastclose = raid_lastclose,
1.323 mlelstv 249: .d_minphys = minphys
250: };
1.235 oster 251:
1.1 oster 252: #define raidunit(x) DISKUNIT(x)
1.335 mlelstv 253: #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
1.1 oster 254:
1.202 oster 255: extern struct cfdriver raid_cd;
1.266 dyoung 256: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
257: raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
258: DVF_DETACH_SHUTDOWN);
1.202 oster 259:
1.353 mrg 260: /* Internal representation of a rf_recon_req */
261: struct rf_recon_req_internal {
262: RF_RowCol_t col;
263: RF_ReconReqFlags_t flags;
264: void *raidPtr;
265: };
266:
1.186 perry 267: /*
268: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
269: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 270: * kernel memory, especially on writes, and in degraded mode reads.
1.186 perry 271: *
272: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
273: * a single 64K write will typically require 64K for the old data,
274: * 64K for the old parity, and 64K for the new parity, for a total
1.28 oster 275: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 276: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 277: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186 perry 278: *
1.28 oster 279: * Now in degraded mode, for example, a 64K read on the above setup may
1.186 perry 280: * require data reconstruction, which will require *all* of the 4 remaining
1.28 oster 281: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 282: */
283:
284: #ifndef RAIDOUTSTANDING
1.28 oster 285: #define RAIDOUTSTANDING 6
1.20 oster 286: #endif
287:
1.1 oster 288: #define RAIDLABELDEV(dev) \
289: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
290:
291: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9 oster 292:
1.104 oster 293: static int raidlock(struct raid_softc *);
294: static void raidunlock(struct raid_softc *);
1.1 oster 295:
1.266 dyoung 296: static int raid_detach_unlocked(struct raid_softc *);
297:
1.104 oster 298: static void rf_markalldirty(RF_Raid_t *);
1.304 christos 299: static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
1.48 oster 300:
1.353 mrg 301: void rf_ReconThread(struct rf_recon_req_internal *);
1.104 oster 302: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
303: void rf_CopybackThread(RF_Raid_t *raidPtr);
1.353 mrg 304: void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
1.261 dyoung 305: int rf_autoconfig(device_t);
1.142 thorpej 306: void rf_buildroothack(RF_ConfigSet_t *);
1.104 oster 307:
308: RF_AutoConfig_t *rf_find_raid_components(void);
309: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
310: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.292 oster 311: int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
1.104 oster 312: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
313: int rf_set_autoconfig(RF_Raid_t *, int);
314: int rf_set_rootpartition(RF_Raid_t *, int);
315: void rf_release_all_vps(RF_ConfigSet_t *);
316: void rf_cleanup_config_set(RF_ConfigSet_t *);
317: int rf_have_enough_components(RF_ConfigSet_t *);
1.300 christos 318: struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278 mrg 319: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48 oster 320:
1.295 erh 321: /*
322: * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
323: * Note that this is overridden by having RAID_AUTOCONFIG as an option
324: * in the kernel config file.
325: */
326: #ifdef RAID_AUTOCONFIG
327: int raidautoconfig = 1;
328: #else
329: int raidautoconfig = 0;
330: #endif
331: static bool raidautoconfigdone = false;
1.37 oster 332:
1.177 oster 333: struct RF_Pools_s rf_pools;
334:
1.300 christos 335: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
336: static kmutex_t raid_lock;
1.1 oster 337:
1.300 christos 338: static struct raid_softc *
339: raidcreate(int unit) {
340: struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
341: sc->sc_unit = unit;
1.327 pgoyette 342: cv_init(&sc->sc_cv, "raidunit");
343: mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
1.300 christos 344: return sc;
345: }
1.1 oster 346:
1.300 christos 347: static void
348: raiddestroy(struct raid_softc *sc) {
1.327 pgoyette 349: cv_destroy(&sc->sc_cv);
350: mutex_destroy(&sc->sc_mutex);
1.300 christos 351: kmem_free(sc, sizeof(*sc));
352: }
1.50 oster 353:
1.300 christos 354: static struct raid_softc *
1.327 pgoyette 355: raidget(int unit, bool create) {
1.300 christos 356: struct raid_softc *sc;
357: if (unit < 0) {
358: #ifdef DIAGNOSTIC
359: panic("%s: unit %d!", __func__, unit);
360: #endif
361: return NULL;
362: }
363: mutex_enter(&raid_lock);
364: LIST_FOREACH(sc, &raids, sc_link) {
365: if (sc->sc_unit == unit) {
366: mutex_exit(&raid_lock);
367: return sc;
368: }
369: }
370: mutex_exit(&raid_lock);
1.327 pgoyette 371: if (!create)
372: return NULL;
1.300 christos 373: if ((sc = raidcreate(unit)) == NULL)
374: return NULL;
375: mutex_enter(&raid_lock);
376: LIST_INSERT_HEAD(&raids, sc, sc_link);
377: mutex_exit(&raid_lock);
378: return sc;
379: }
380:
381: static void
382: raidput(struct raid_softc *sc) {
383: mutex_enter(&raid_lock);
384: LIST_REMOVE(sc, sc_link);
385: mutex_exit(&raid_lock);
386: raiddestroy(sc);
387: }
1.1 oster 388:
1.300 christos 389: void
390: raidattach(int num)
391: {
1.62 oster 392:
1.142 thorpej 393: /*
1.327 pgoyette 394: * Device attachment and associated initialization now occurs
395: * as part of the module initialization.
1.142 thorpej 396: */
397: }
398:
399: int
1.261 dyoung 400: rf_autoconfig(device_t self)
1.142 thorpej 401: {
402: RF_AutoConfig_t *ac_list;
403: RF_ConfigSet_t *config_sets;
404:
1.295 erh 405: if (!raidautoconfig || raidautoconfigdone == true)
1.142 thorpej 406: return (0);
407:
408: /* XXX This code can only be run once. */
1.295 erh 409: raidautoconfigdone = true;
1.142 thorpej 410:
1.307 christos 411: #ifdef __HAVE_CPU_BOOTCONF
412: /*
413: * 0. find the boot device if needed first so we can use it later
414: * this needs to be done before we autoconfigure any raid sets,
415: * because if we use wedges we are not going to be able to open
416: * the boot device later
417: */
418: if (booted_device == NULL)
419: cpu_bootconf();
420: #endif
1.48 oster 421: /* 1. locate all RAID components on the system */
1.258 ad 422: aprint_debug("Searching for RAID components...\n");
1.48 oster 423: ac_list = rf_find_raid_components();
424:
1.142 thorpej 425: /* 2. Sort them into their respective sets. */
1.48 oster 426: config_sets = rf_create_auto_sets(ac_list);
427:
1.142 thorpej 428: /*
1.299 oster 429: * 3. Evaluate each set and configure the valid ones.
1.142 thorpej 430: * This gets done in rf_buildroothack().
431: */
432: rf_buildroothack(config_sets);
1.48 oster 433:
1.213 christos 434: return 1;
1.48 oster 435: }
436:
1.367 christos 437: int
438: rf_inited(const struct raid_softc *rs) {
439: return (rs->sc_flags & RAIDF_INITED) != 0;
440: }
441:
1.368 oster 442: RF_Raid_t *
443: rf_get_raid(struct raid_softc *rs) {
444: return &rs->sc_r;
445: }
446:
1.367 christos 447: int
448: rf_get_unit(const struct raid_softc *rs) {
449: return rs->sc_unit;
450: }
451:
1.306 christos 452: static int
1.307 christos 453: rf_containsboot(RF_Raid_t *r, device_t bdv) {
1.359 bad 454: const char *bootname;
455: size_t len;
456:
457: /* if bdv is NULL, the set can't contain it. exit early. */
458: if (bdv == NULL)
459: return 0;
460:
461: bootname = device_xname(bdv);
462: len = strlen(bootname);
1.306 christos 463:
464: for (int col = 0; col < r->numCol; col++) {
1.307 christos 465: const char *devname = r->Disks[col].devname;
1.306 christos 466: devname += sizeof("/dev/") - 1;
1.307 christos 467: if (strncmp(devname, "dk", 2) == 0) {
468: const char *parent =
469: dkwedge_get_parent_name(r->Disks[col].dev);
470: if (parent != NULL)
471: devname = parent;
472: }
1.306 christos 473: if (strncmp(devname, bootname, len) == 0) {
474: struct raid_softc *sc = r->softc;
475: aprint_debug("raid%d includes boot device %s\n",
476: sc->sc_unit, devname);
477: return 1;
478: }
479: }
480: return 0;
481: }
482:
1.48 oster 483: void
1.142 thorpej 484: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48 oster 485: {
486: RF_ConfigSet_t *cset;
487: RF_ConfigSet_t *next_cset;
1.51 oster 488: int num_root;
1.300 christos 489: struct raid_softc *sc, *rsc;
1.335 mlelstv 490: struct dk_softc *dksc;
1.48 oster 491:
1.300 christos 492: sc = rsc = NULL;
1.51 oster 493: num_root = 0;
1.48 oster 494: cset = config_sets;
1.271 dyoung 495: while (cset != NULL) {
1.48 oster 496: next_cset = cset->next;
1.186 perry 497: if (rf_have_enough_components(cset) &&
1.300 christos 498: cset->ac->clabel->autoconfigure == 1) {
499: sc = rf_auto_config_set(cset);
500: if (sc != NULL) {
1.359 bad 501: aprint_debug("raid%d: configured ok, rootable %d\n",
502: sc->sc_unit, cset->rootable);
1.51 oster 503: if (cset->rootable) {
1.300 christos 504: rsc = sc;
1.51 oster 505: num_root++;
506: }
507: } else {
508: /* The autoconfig didn't work :( */
1.300 christos 509: aprint_debug("Autoconfig failed\n");
1.51 oster 510: rf_release_all_vps(cset);
1.48 oster 511: }
512: } else {
1.186 perry 513: /* we're not autoconfiguring this set...
1.48 oster 514: release the associated resources */
1.49 oster 515: rf_release_all_vps(cset);
1.48 oster 516: }
517: /* cleanup */
1.49 oster 518: rf_cleanup_config_set(cset);
1.48 oster 519: cset = next_cset;
520: }
1.335 mlelstv 521: dksc = &rsc->sc_dksc;
1.122 oster 522:
1.223 oster 523: /* if the user has specified what the root device should be
524: then we don't touch booted_device or boothowto... */
525:
1.359 bad 526: if (rootspec != NULL) {
527: DPRINTF("%s: rootspec %s\n", __func__, rootspec);
1.223 oster 528: return;
1.359 bad 529: }
1.223 oster 530:
1.122 oster 531: /* we found something bootable... */
532:
1.310 christos 533: /*
534: * XXX: The following code assumes that the root raid
535: * is the first ('a') partition. This is about the best
536: * we can do with a BSD disklabel, but we might be able
537: * to do better with a GPT label, by setting a specified
538: * attribute to indicate the root partition. We can then
539: * stash the partition number in the r->root_partition
540: * high bits (the bottom 2 bits are already used). For
541: * now we just set booted_partition to 0 when we override
542: * root.
543: */
1.122 oster 544: if (num_root == 1) {
1.306 christos 545: device_t candidate_root;
1.335 mlelstv 546: if (dksc->sc_dkdev.dk_nwedges != 0) {
1.297 christos 547: char cname[sizeof(cset->ac->devname)];
1.344 christos 548: /* XXX: assume partition 'a' first */
1.297 christos 549: snprintf(cname, sizeof(cname), "%s%c",
1.335 mlelstv 550: device_xname(dksc->sc_dev), 'a');
1.306 christos 551: candidate_root = dkwedge_find_by_wname(cname);
1.344 christos 552: DPRINTF("%s: candidate wedge root=%s\n", __func__,
553: cname);
554: if (candidate_root == NULL) {
555: /*
556: * If that is not found, because we don't use
557: * disklabel, return the first dk child
558: * XXX: we can skip the 'a' check above
559: * and always do this...
560: */
561: size_t i = 0;
562: candidate_root = dkwedge_find_by_parent(
563: device_xname(dksc->sc_dev), &i);
564: }
565: DPRINTF("%s: candidate wedge root=%p\n", __func__,
566: candidate_root);
1.297 christos 567: } else
1.335 mlelstv 568: candidate_root = dksc->sc_dev;
1.344 christos 569: DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570: DPRINTF("%s: booted_device=%p root_partition=%d "
1.359 bad 571: "contains_boot=%d",
572: __func__, booted_device, rsc->sc_r.root_partition,
573: rf_containsboot(&rsc->sc_r, booted_device));
574: /* XXX the check for booted_device == NULL can probably be
575: * dropped, now that rf_containsboot handles that case.
576: */
1.308 christos 577: if (booted_device == NULL ||
578: rsc->sc_r.root_partition == 1 ||
1.310 christos 579: rf_containsboot(&rsc->sc_r, booted_device)) {
1.308 christos 580: booted_device = candidate_root;
1.351 christos 581: booted_method = "raidframe/single";
1.310 christos 582: booted_partition = 0; /* XXX assume 'a' */
583: }
1.122 oster 584: } else if (num_root > 1) {
1.344 christos 585: DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
586: booted_device);
1.226 oster 587:
588: /*
589: * Maybe the MD code can help. If it cannot, then
590: * setroot() will discover that we have no
591: * booted_device and will ask the user if nothing was
592: * hardwired in the kernel config file
593: */
594: if (booted_device == NULL)
595: return;
596:
597: num_root = 0;
1.300 christos 598: mutex_enter(&raid_lock);
599: LIST_FOREACH(sc, &raids, sc_link) {
600: RF_Raid_t *r = &sc->sc_r;
601: if (r->valid == 0)
1.226 oster 602: continue;
603:
1.300 christos 604: if (r->root_partition == 0)
1.226 oster 605: continue;
606:
1.306 christos 607: if (rf_containsboot(r, booted_device)) {
1.226 oster 608: num_root++;
1.300 christos 609: rsc = sc;
1.335 mlelstv 610: dksc = &rsc->sc_dksc;
1.226 oster 611: }
612: }
1.300 christos 613: mutex_exit(&raid_lock);
1.295 erh 614:
1.226 oster 615: if (num_root == 1) {
1.335 mlelstv 616: booted_device = dksc->sc_dev;
1.351 christos 617: booted_method = "raidframe/multi";
1.310 christos 618: booted_partition = 0; /* XXX assume 'a' */
1.226 oster 619: } else {
620: /* we can't guess.. require the user to answer... */
621: boothowto |= RB_ASKNAME;
622: }
1.51 oster 623: }
1.1 oster 624: }
625:
1.324 mrg 626: static int
1.169 oster 627: raidsize(dev_t dev)
1.1 oster 628: {
629: struct raid_softc *rs;
1.335 mlelstv 630: struct dk_softc *dksc;
631: unsigned int unit;
1.1 oster 632:
633: unit = raidunit(dev);
1.327 pgoyette 634: if ((rs = raidget(unit, false)) == NULL)
1.336 mlelstv 635: return -1;
1.335 mlelstv 636: dksc = &rs->sc_dksc;
637:
1.1 oster 638: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.336 mlelstv 639: return -1;
1.1 oster 640:
1.335 mlelstv 641: return dk_size(dksc, dev);
642: }
1.1 oster 643:
1.335 mlelstv 644: static int
645: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
646: {
647: unsigned int unit;
648: struct raid_softc *rs;
649: struct dk_softc *dksc;
1.1 oster 650:
1.335 mlelstv 651: unit = raidunit(dev);
652: if ((rs = raidget(unit, false)) == NULL)
653: return ENXIO;
654: dksc = &rs->sc_dksc;
1.1 oster 655:
1.335 mlelstv 656: if ((rs->sc_flags & RAIDF_INITED) == 0)
657: return ENODEV;
1.1 oster 658:
1.336 mlelstv 659: /*
660: Note that blkno is relative to this particular partition.
661: By adding adding RF_PROTECTED_SECTORS, we get a value that
662: is relative to the partition used for the underlying component.
663: */
664: blkno += RF_PROTECTED_SECTORS;
665:
1.376.4.1! martin 666: return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
1.1 oster 667: }
668:
1.324 mrg 669: static int
1.335 mlelstv 670: raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1.1 oster 671: {
1.335 mlelstv 672: struct raid_softc *rs = raidsoftc(dev);
1.231 oster 673: const struct bdevsw *bdev;
674: RF_Raid_t *raidPtr;
1.335 mlelstv 675: int c, sparecol, j, scol, dumpto;
1.231 oster 676: int error = 0;
677:
1.300 christos 678: raidPtr = &rs->sc_r;
1.231 oster 679:
680: /* we only support dumping to RAID 1 sets */
681: if (raidPtr->Layout.numDataCol != 1 ||
682: raidPtr->Layout.numParityCol != 1)
683: return EINVAL;
684:
685: if ((error = raidlock(rs)) != 0)
686: return error;
687:
688: /* figure out what device is alive.. */
689:
690: /*
691: Look for a component to dump to. The preference for the
692: component to dump to is as follows:
693: 1) the master
694: 2) a used_spare of the master
695: 3) the slave
696: 4) a used_spare of the slave
697: */
698:
699: dumpto = -1;
700: for (c = 0; c < raidPtr->numCol; c++) {
701: if (raidPtr->Disks[c].status == rf_ds_optimal) {
702: /* this might be the one */
703: dumpto = c;
704: break;
705: }
706: }
707:
708: /*
709: At this point we have possibly selected a live master or a
710: live slave. We now check to see if there is a spared
711: master (or a spared slave), if we didn't find a live master
712: or a live slave.
713: */
714:
715: for (c = 0; c < raidPtr->numSpare; c++) {
716: sparecol = raidPtr->numCol + c;
717: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
718: /* How about this one? */
719: scol = -1;
720: for(j=0;j<raidPtr->numCol;j++) {
721: if (raidPtr->Disks[j].spareCol == sparecol) {
722: scol = j;
723: break;
724: }
725: }
726: if (scol == 0) {
727: /*
728: We must have found a spared master!
729: We'll take that over anything else
730: found so far. (We couldn't have
731: found a real master before, since
732: this is a used spare, and it's
733: saying that it's replacing the
734: master.) On reboot (with
735: autoconfiguration turned on)
736: sparecol will become the 1st
737: component (component0) of this set.
738: */
739: dumpto = sparecol;
740: break;
741: } else if (scol != -1) {
742: /*
743: Must be a spared slave. We'll dump
744: to that if we havn't found anything
745: else so far.
746: */
747: if (dumpto == -1)
748: dumpto = sparecol;
749: }
750: }
751: }
752:
753: if (dumpto == -1) {
754: /* we couldn't find any live components to dump to!?!?
755: */
756: error = EINVAL;
757: goto out;
758: }
759:
760: bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
1.342 mlelstv 761: if (bdev == NULL) {
762: error = ENXIO;
763: goto out;
764: }
1.231 oster 765:
766: error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
1.336 mlelstv 767: blkno, va, nblk * raidPtr->bytesPerSector);
1.231 oster 768:
769: out:
770: raidunlock(rs);
771:
772: return error;
1.1 oster 773: }
1.324 mrg 774:
1.1 oster 775: /* ARGSUSED */
1.324 mrg 776: static int
1.222 christos 777: raidopen(dev_t dev, int flags, int fmt,
778: struct lwp *l)
1.1 oster 779: {
1.9 oster 780: int unit = raidunit(dev);
1.1 oster 781: struct raid_softc *rs;
1.335 mlelstv 782: struct dk_softc *dksc;
783: int error = 0;
1.9 oster 784: int part, pmask;
785:
1.327 pgoyette 786: if ((rs = raidget(unit, true)) == NULL)
1.300 christos 787: return ENXIO;
1.1 oster 788: if ((error = raidlock(rs)) != 0)
1.9 oster 789: return (error);
1.266 dyoung 790:
791: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
792: error = EBUSY;
793: goto bad;
794: }
795:
1.335 mlelstv 796: dksc = &rs->sc_dksc;
1.1 oster 797:
798: part = DISKPART(dev);
799: pmask = (1 << part);
800:
1.335 mlelstv 801: if (!DK_BUSY(dksc, pmask) &&
1.13 oster 802: ((rs->sc_flags & RAIDF_INITED) != 0)) {
803: /* First one... mark things as dirty... Note that we *MUST*
804: have done a configure before this. I DO NOT WANT TO BE
805: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
806: THAT THEY BELONG TOGETHER!!!!! */
807: /* XXX should check to see if we're only open for reading
808: here... If so, we needn't do this, but then need some
809: other way of keeping track of what's happened.. */
810:
1.300 christos 811: rf_markalldirty(&rs->sc_r);
1.13 oster 812: }
813:
1.335 mlelstv 814: if ((rs->sc_flags & RAIDF_INITED) != 0)
815: error = dk_open(dksc, dev, flags, fmt, l);
1.1 oster 816:
1.213 christos 817: bad:
1.1 oster 818: raidunlock(rs);
819:
1.9 oster 820: return (error);
1.1 oster 821:
822:
823: }
1.324 mrg 824:
1.335 mlelstv 825: static int
826: raid_lastclose(device_t self)
827: {
828: struct raid_softc *rs = raidsoftc(self);
829:
830: /* Last one... device is not unconfigured yet.
831: Device shutdown has taken care of setting the
832: clean bits if RAIDF_INITED is not set
833: mark things as clean... */
834:
835: rf_update_component_labels(&rs->sc_r,
836: RF_FINAL_COMPONENT_UPDATE);
837:
838: /* pass to unlocked code */
839: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
840: rs->sc_flags |= RAIDF_DETACH;
841:
842: return 0;
843: }
844:
1.1 oster 845: /* ARGSUSED */
1.324 mrg 846: static int
1.222 christos 847: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1 oster 848: {
1.9 oster 849: int unit = raidunit(dev);
1.1 oster 850: struct raid_softc *rs;
1.335 mlelstv 851: struct dk_softc *dksc;
852: cfdata_t cf;
853: int error = 0, do_detach = 0, do_put = 0;
1.1 oster 854:
1.327 pgoyette 855: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 856: return ENXIO;
1.335 mlelstv 857: dksc = &rs->sc_dksc;
1.1 oster 858:
859: if ((error = raidlock(rs)) != 0)
860: return (error);
861:
1.335 mlelstv 862: if ((rs->sc_flags & RAIDF_INITED) != 0) {
863: error = dk_close(dksc, dev, flags, fmt, l);
864: if ((rs->sc_flags & RAIDF_DETACH) != 0)
865: do_detach = 1;
866: } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
867: do_put = 1;
1.1 oster 868:
1.335 mlelstv 869: raidunlock(rs);
1.1 oster 870:
1.335 mlelstv 871: if (do_detach) {
872: /* free the pseudo device attach bits */
873: cf = device_cfdata(dksc->sc_dev);
874: error = config_detach(dksc->sc_dev, 0);
875: if (error == 0)
876: free(cf, M_RAIDFRAME);
877: } else if (do_put) {
878: raidput(rs);
1.1 oster 879: }
1.186 perry 880:
1.335 mlelstv 881: return (error);
1.147 oster 882:
1.335 mlelstv 883: }
1.327 pgoyette 884:
1.335 mlelstv 885: static void
886: raid_wakeup(RF_Raid_t *raidPtr)
887: {
888: rf_lock_mutex2(raidPtr->iodone_lock);
889: rf_signal_cond2(raidPtr->iodone_cv);
890: rf_unlock_mutex2(raidPtr->iodone_lock);
1.1 oster 891: }
892:
1.324 mrg 893: static void
1.169 oster 894: raidstrategy(struct buf *bp)
1.1 oster 895: {
1.335 mlelstv 896: unsigned int unit;
897: struct raid_softc *rs;
898: struct dk_softc *dksc;
1.1 oster 899: RF_Raid_t *raidPtr;
900:
1.335 mlelstv 901: unit = raidunit(bp->b_dev);
1.327 pgoyette 902: if ((rs = raidget(unit, false)) == NULL) {
1.30 oster 903: bp->b_error = ENXIO;
1.335 mlelstv 904: goto fail;
1.30 oster 905: }
1.300 christos 906: if ((rs->sc_flags & RAIDF_INITED) == 0) {
907: bp->b_error = ENXIO;
1.335 mlelstv 908: goto fail;
1.1 oster 909: }
1.335 mlelstv 910: dksc = &rs->sc_dksc;
1.300 christos 911: raidPtr = &rs->sc_r;
1.335 mlelstv 912:
913: /* Queue IO only */
914: if (dk_strategy_defer(dksc, bp))
1.196 yamt 915: goto done;
1.1 oster 916:
1.335 mlelstv 917: /* schedule the IO to happen at the next convenient time */
918: raid_wakeup(raidPtr);
919:
920: done:
921: return;
922:
923: fail:
924: bp->b_resid = bp->b_bcount;
925: biodone(bp);
926: }
927:
928: static int
929: raid_diskstart(device_t dev, struct buf *bp)
930: {
931: struct raid_softc *rs = raidsoftc(dev);
932: RF_Raid_t *raidPtr;
1.1 oster 933:
1.335 mlelstv 934: raidPtr = &rs->sc_r;
935: if (!raidPtr->valid) {
936: db1_printf(("raid is not valid..\n"));
937: return ENODEV;
1.196 yamt 938: }
1.285 mrg 939:
1.335 mlelstv 940: /* XXX */
941: bp->b_resid = 0;
942:
943: return raiddoaccess(raidPtr, bp);
944: }
1.1 oster 945:
1.335 mlelstv 946: void
947: raiddone(RF_Raid_t *raidPtr, struct buf *bp)
948: {
949: struct raid_softc *rs;
950: struct dk_softc *dksc;
1.34 oster 951:
1.335 mlelstv 952: rs = raidPtr->softc;
953: dksc = &rs->sc_dksc;
1.34 oster 954:
1.335 mlelstv 955: dk_done(dksc, bp);
1.34 oster 956:
1.335 mlelstv 957: rf_lock_mutex2(raidPtr->mutex);
958: raidPtr->openings++;
959: rf_unlock_mutex2(raidPtr->mutex);
1.196 yamt 960:
1.335 mlelstv 961: /* schedule more IO */
962: raid_wakeup(raidPtr);
1.1 oster 963: }
1.324 mrg 964:
1.1 oster 965: /* ARGSUSED */
1.324 mrg 966: static int
1.222 christos 967: raidread(dev_t dev, struct uio *uio, int flags)
1.1 oster 968: {
1.9 oster 969: int unit = raidunit(dev);
1.1 oster 970: struct raid_softc *rs;
971:
1.327 pgoyette 972: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 973: return ENXIO;
1.1 oster 974:
975: if ((rs->sc_flags & RAIDF_INITED) == 0)
976: return (ENXIO);
977:
978: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
979:
980: }
1.324 mrg 981:
1.1 oster 982: /* ARGSUSED */
1.324 mrg 983: static int
1.222 christos 984: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1 oster 985: {
1.9 oster 986: int unit = raidunit(dev);
1.1 oster 987: struct raid_softc *rs;
988:
1.327 pgoyette 989: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 990: return ENXIO;
1.1 oster 991:
992: if ((rs->sc_flags & RAIDF_INITED) == 0)
993: return (ENXIO);
1.147 oster 994:
1.1 oster 995: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
996:
997: }
998:
1.266 dyoung 999: static int
1000: raid_detach_unlocked(struct raid_softc *rs)
1001: {
1.335 mlelstv 1002: struct dk_softc *dksc = &rs->sc_dksc;
1003: RF_Raid_t *raidPtr;
1.266 dyoung 1004: int error;
1005:
1.300 christos 1006: raidPtr = &rs->sc_r;
1.266 dyoung 1007:
1.337 mlelstv 1008: if (DK_BUSY(dksc, 0) ||
1009: raidPtr->recon_in_progress != 0 ||
1010: raidPtr->parity_rewrite_in_progress != 0 ||
1011: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1012: return EBUSY;
1013:
1014: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.333 mlelstv 1015: return 0;
1016:
1017: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1018:
1019: if ((error = rf_Shutdown(raidPtr)) != 0)
1.266 dyoung 1020: return error;
1021:
1.335 mlelstv 1022: rs->sc_flags &= ~RAIDF_INITED;
1023:
1024: /* Kill off any queued buffers */
1025: dk_drain(dksc);
1026: bufq_free(dksc->sc_bufq);
1027:
1.266 dyoung 1028: /* Detach the disk. */
1.335 mlelstv 1029: dkwedge_delall(&dksc->sc_dkdev);
1030: disk_detach(&dksc->sc_dkdev);
1031: disk_destroy(&dksc->sc_dkdev);
1032: dk_detach(dksc);
1.333 mlelstv 1033:
1.266 dyoung 1034: return 0;
1035: }
1036:
1.366 christos 1037: static bool
1038: rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1039: {
1040: switch (cmd) {
1041: case RAIDFRAME_ADD_HOT_SPARE:
1042: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1043: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1044: case RAIDFRAME_CHECK_PARITY:
1045: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1046: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1047: case RAIDFRAME_CHECK_RECON_STATUS:
1048: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1049: case RAIDFRAME_COPYBACK:
1050: case RAIDFRAME_DELETE_COMPONENT:
1051: case RAIDFRAME_FAIL_DISK:
1052: case RAIDFRAME_GET_ACCTOTALS:
1053: case RAIDFRAME_GET_COMPONENT_LABEL:
1054: case RAIDFRAME_GET_INFO:
1055: case RAIDFRAME_GET_SIZE:
1056: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1057: case RAIDFRAME_INIT_LABELS:
1058: case RAIDFRAME_KEEP_ACCTOTALS:
1059: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1060: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1061: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1062: case RAIDFRAME_PARITYMAP_STATUS:
1063: case RAIDFRAME_REBUILD_IN_PLACE:
1064: case RAIDFRAME_REMOVE_HOT_SPARE:
1065: case RAIDFRAME_RESET_ACCTOTALS:
1066: case RAIDFRAME_REWRITEPARITY:
1067: case RAIDFRAME_SET_AUTOCONFIG:
1068: case RAIDFRAME_SET_COMPONENT_LABEL:
1069: case RAIDFRAME_SET_ROOT:
1.369 oster 1070: return (rs->sc_flags & RAIDF_INITED) == 0;
1.366 christos 1071: }
1072: return false;
1073: }
1074:
1075: int
1076: rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1077: {
1078: struct rf_recon_req_internal *rrint;
1079:
1080: if (raidPtr->Layout.map->faultsTolerated == 0) {
1081: /* Can't do this on a RAID 0!! */
1082: return EINVAL;
1083: }
1084:
1085: if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1086: /* bad column */
1087: return EINVAL;
1088: }
1089:
1090: rf_lock_mutex2(raidPtr->mutex);
1091: if (raidPtr->status == rf_rs_reconstructing) {
1092: /* you can't fail a disk while we're reconstructing! */
1093: /* XXX wrong for RAID6 */
1094: goto out;
1095: }
1096: if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1097: (raidPtr->numFailures > 0)) {
1098: /* some other component has failed. Let's not make
1099: things worse. XXX wrong for RAID6 */
1100: goto out;
1101: }
1102: if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1103: /* Can't fail a spared disk! */
1104: goto out;
1105: }
1106: rf_unlock_mutex2(raidPtr->mutex);
1107:
1108: /* make a copy of the recon request so that we don't rely on
1109: * the user's buffer */
1.374 christos 1110: rrint = RF_Malloc(sizeof(*rrint));
1.366 christos 1111: if (rrint == NULL)
1112: return(ENOMEM);
1113: rrint->col = rr->col;
1114: rrint->flags = rr->flags;
1115: rrint->raidPtr = raidPtr;
1116:
1117: return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1118: rrint, "raid_recon");
1119: out:
1120: rf_unlock_mutex2(raidPtr->mutex);
1121: return EINVAL;
1122: }
1123:
1.324 mrg 1124: static int
1.367 christos 1125: rf_copyinspecificbuf(RF_Config_t *k_cfg)
1126: {
1127: /* allocate a buffer for the layout-specific data, and copy it in */
1128: if (k_cfg->layoutSpecificSize == 0)
1129: return 0;
1130:
1131: if (k_cfg->layoutSpecificSize > 10000) {
1132: /* sanity check */
1133: return EINVAL;
1134: }
1135:
1136: u_char *specific_buf;
1.374 christos 1137: specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1.367 christos 1138: if (specific_buf == NULL)
1139: return ENOMEM;
1140:
1141: int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1142: k_cfg->layoutSpecificSize);
1143: if (retcode) {
1144: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1145: db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1146: return retcode;
1147: }
1148:
1149: k_cfg->layoutSpecific = specific_buf;
1150: return 0;
1151: }
1152:
1153: static int
1154: rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1155: {
1.372 christos 1156: RF_Config_t *u_cfg = *((RF_Config_t **) data);
1157:
1.367 christos 1158: if (rs->sc_r.valid) {
1159: /* There is a valid RAID set running on this unit! */
1160: printf("raid%d: Device already configured!\n", rs->sc_unit);
1161: return EINVAL;
1162: }
1163:
1164: /* copy-in the configuration information */
1165: /* data points to a pointer to the configuration structure */
1.374 christos 1166: *k_cfg = RF_Malloc(sizeof(**k_cfg));
1.367 christos 1167: if (*k_cfg == NULL) {
1168: return ENOMEM;
1169: }
1.373 christos 1170: int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1.367 christos 1171: if (retcode == 0)
1172: return 0;
1173: RF_Free(*k_cfg, sizeof(RF_Config_t));
1174: db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1175: rs->sc_flags |= RAIDF_SHUTDOWN;
1176: return retcode;
1177: }
1178:
1179: int
1180: rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1181: {
1182: int retcode;
1183: RF_Raid_t *raidPtr = &rs->sc_r;
1184:
1185: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1186:
1187: if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1188: goto out;
1189:
1190: /* should do some kind of sanity check on the configuration.
1191: * Store the sum of all the bytes in the last byte? */
1192:
1193: /* configure the system */
1194:
1195: /*
1196: * Clear the entire RAID descriptor, just to make sure
1197: * there is no stale data left in the case of a
1198: * reconfiguration
1199: */
1200: memset(raidPtr, 0, sizeof(*raidPtr));
1201: raidPtr->softc = rs;
1202: raidPtr->raidid = rs->sc_unit;
1203:
1204: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1205:
1206: if (retcode == 0) {
1207: /* allow this many simultaneous IO's to
1208: this RAID device */
1209: raidPtr->openings = RAIDOUTSTANDING;
1210:
1211: raidinit(rs);
1212: raid_wakeup(raidPtr);
1213: rf_markalldirty(raidPtr);
1214: }
1215:
1216: /* free the buffers. No return code here. */
1217: if (k_cfg->layoutSpecificSize) {
1218: RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1219: }
1220: out:
1221: RF_Free(k_cfg, sizeof(RF_Config_t));
1222: if (retcode) {
1223: /*
1224: * If configuration failed, set sc_flags so that we
1225: * will detach the device when we close it.
1226: */
1227: rs->sc_flags |= RAIDF_SHUTDOWN;
1228: }
1229: return retcode;
1230: }
1231:
1232: #if RF_DISABLED
1233: static int
1234: rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1235: {
1236:
1237: /* XXX check the label for valid stuff... */
1238: /* Note that some things *should not* get modified --
1239: the user should be re-initing the labels instead of
1240: trying to patch things.
1241: */
1242: #ifdef DEBUG
1243: int raidid = raidPtr->raidid;
1244: printf("raid%d: Got component label:\n", raidid);
1245: printf("raid%d: Version: %d\n", raidid, clabel->version);
1246: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1247: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1248: printf("raid%d: Column: %d\n", raidid, clabel->column);
1249: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1250: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1251: printf("raid%d: Status: %d\n", raidid, clabel->status);
1252: #endif /* DEBUG */
1253: clabel->row = 0;
1254: int column = clabel->column;
1255:
1256: if ((column < 0) || (column >= raidPtr->numCol)) {
1257: return(EINVAL);
1258: }
1259:
1260: /* XXX this isn't allowed to do anything for now :-) */
1261:
1262: /* XXX and before it is, we need to fill in the rest
1263: of the fields!?!?!?! */
1264: memcpy(raidget_component_label(raidPtr, column),
1265: clabel, sizeof(*clabel));
1266: raidflush_component_label(raidPtr, column);
1267: return 0;
1268: }
1269: #endif
1270:
1271: static int
1272: rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1273: {
1274: /*
1275: we only want the serial number from
1276: the above. We get all the rest of the information
1277: from the config that was used to create this RAID
1278: set.
1279: */
1280:
1281: raidPtr->serial_number = clabel->serial_number;
1282:
1283: for (int column = 0; column < raidPtr->numCol; column++) {
1284: RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1285: if (RF_DEAD_DISK(diskPtr->status))
1286: continue;
1287: RF_ComponentLabel_t *ci_label = raidget_component_label(
1288: raidPtr, column);
1289: /* Zeroing this is important. */
1290: memset(ci_label, 0, sizeof(*ci_label));
1291: raid_init_component_label(raidPtr, ci_label);
1292: ci_label->serial_number = raidPtr->serial_number;
1293: ci_label->row = 0; /* we dont' pretend to support more */
1294: rf_component_label_set_partitionsize(ci_label,
1295: diskPtr->partitionSize);
1296: ci_label->column = column;
1297: raidflush_component_label(raidPtr, column);
1298: /* XXXjld what about the spares? */
1299: }
1300:
1301: return 0;
1302: }
1303:
1304: static int
1305: rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1306: {
1307:
1308: if (raidPtr->Layout.map->faultsTolerated == 0) {
1309: /* Can't do this on a RAID 0!! */
1310: return EINVAL;
1311: }
1312:
1313: if (raidPtr->recon_in_progress == 1) {
1314: /* a reconstruct is already in progress! */
1315: return EINVAL;
1316: }
1317:
1318: RF_SingleComponent_t component;
1319: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1320: component.row = 0; /* we don't support any more */
1321: int column = component.column;
1322:
1323: if ((column < 0) || (column >= raidPtr->numCol)) {
1324: return EINVAL;
1325: }
1326:
1327: rf_lock_mutex2(raidPtr->mutex);
1328: if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1329: (raidPtr->numFailures > 0)) {
1330: /* XXX 0 above shouldn't be constant!!! */
1331: /* some component other than this has failed.
1332: Let's not make things worse than they already
1333: are... */
1334: printf("raid%d: Unable to reconstruct to disk at:\n",
1335: raidPtr->raidid);
1336: printf("raid%d: Col: %d Too many failures.\n",
1337: raidPtr->raidid, column);
1338: rf_unlock_mutex2(raidPtr->mutex);
1339: return EINVAL;
1340: }
1341:
1342: if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1343: printf("raid%d: Unable to reconstruct to disk at:\n",
1344: raidPtr->raidid);
1345: printf("raid%d: Col: %d "
1346: "Reconstruction already occurring!\n",
1347: raidPtr->raidid, column);
1348:
1349: rf_unlock_mutex2(raidPtr->mutex);
1350: return EINVAL;
1351: }
1352:
1353: if (raidPtr->Disks[column].status == rf_ds_spared) {
1354: rf_unlock_mutex2(raidPtr->mutex);
1355: return EINVAL;
1356: }
1357:
1358: rf_unlock_mutex2(raidPtr->mutex);
1359:
1360: struct rf_recon_req_internal *rrint;
1.374 christos 1361: rrint = RF_Malloc(sizeof(*rrint));
1.367 christos 1362: if (rrint == NULL)
1363: return ENOMEM;
1364:
1365: rrint->col = column;
1366: rrint->raidPtr = raidPtr;
1367:
1368: return RF_CREATE_THREAD(raidPtr->recon_thread,
1369: rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1370: }
1371:
1372: static int
1373: rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1374: {
1375: /*
1376: * This makes no sense on a RAID 0, or if we are not reconstructing
1377: * so tell the user it's done.
1378: */
1379: if (raidPtr->Layout.map->faultsTolerated == 0 ||
1380: raidPtr->status != rf_rs_reconstructing) {
1381: *data = 100;
1382: return 0;
1383: }
1384: if (raidPtr->reconControl->numRUsTotal == 0) {
1385: *data = 0;
1386: return 0;
1387: }
1388: *data = (raidPtr->reconControl->numRUsComplete * 100
1389: / raidPtr->reconControl->numRUsTotal);
1390: return 0;
1391: }
1392:
1393: static int
1.225 christos 1394: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1 oster 1395: {
1.9 oster 1396: int unit = raidunit(dev);
1.335 mlelstv 1397: int part, pmask;
1.1 oster 1398: struct raid_softc *rs;
1.335 mlelstv 1399: struct dk_softc *dksc;
1.367 christos 1400: RF_Config_t *k_cfg;
1.42 oster 1401: RF_Raid_t *raidPtr;
1.41 oster 1402: RF_AccTotals_t *totals;
1.367 christos 1403: RF_SingleComponent_t component;
1.371 oster 1404: RF_DeviceConfig_t *d_cfg, *ucfgp;
1.11 oster 1405: int retcode = 0;
1406: int column;
1.48 oster 1407: RF_ComponentLabel_t *clabel;
1.12 oster 1408: RF_SingleComponent_t *sparePtr,*componentPtr;
1.353 mrg 1409: int d;
1.1 oster 1410:
1.327 pgoyette 1411: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 1412: return ENXIO;
1.366 christos 1413:
1.335 mlelstv 1414: dksc = &rs->sc_dksc;
1.300 christos 1415: raidPtr = &rs->sc_r;
1.1 oster 1416:
1.276 mrg 1417: db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1.366 christos 1418: (int) DISKPART(dev), (int) unit, cmd));
1.1 oster 1419:
1420: /* Must be initialized for these... */
1.366 christos 1421: if (rf_must_be_initialized(rs, cmd))
1422: return ENXIO;
1.9 oster 1423:
1.358 pgoyette 1424: switch (cmd) {
1.1 oster 1425: /* configure the system */
1426: case RAIDFRAME_CONFIGURE:
1.367 christos 1427: if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1428: return retcode;
1429: return rf_construct(rs, k_cfg);
1.9 oster 1430:
1431: /* shutdown the system */
1.1 oster 1432: case RAIDFRAME_SHUTDOWN:
1.9 oster 1433:
1.266 dyoung 1434: part = DISKPART(dev);
1435: pmask = (1 << part);
1436:
1.367 christos 1437: if ((retcode = raidlock(rs)) != 0)
1438: return retcode;
1.1 oster 1439:
1.337 mlelstv 1440: if (DK_BUSY(dksc, pmask) ||
1441: raidPtr->recon_in_progress != 0 ||
1442: raidPtr->parity_rewrite_in_progress != 0 ||
1443: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1444: retcode = EBUSY;
1445: else {
1.335 mlelstv 1446: /* detach and free on close */
1.266 dyoung 1447: rs->sc_flags |= RAIDF_SHUTDOWN;
1448: retcode = 0;
1.9 oster 1449: }
1.11 oster 1450:
1.266 dyoung 1451: raidunlock(rs);
1.1 oster 1452:
1.367 christos 1453: return retcode;
1.11 oster 1454: case RAIDFRAME_GET_COMPONENT_LABEL:
1.353 mrg 1455: return rf_get_component_label(raidPtr, data);
1.11 oster 1456:
1.367 christos 1457: #if RF_DISABLED
1.11 oster 1458: case RAIDFRAME_SET_COMPONENT_LABEL:
1.367 christos 1459: return rf_set_component_label(raidPtr, data);
1460: #endif
1.11 oster 1461:
1.367 christos 1462: case RAIDFRAME_INIT_LABELS:
1463: return rf_init_component_label(raidPtr, data);
1.12 oster 1464:
1.48 oster 1465: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1466: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186 perry 1467: printf("raid%d: New autoconfig value is: %d\n",
1.123 oster 1468: raidPtr->raidid, d);
1.78 minoura 1469: *(int *) data = d;
1.367 christos 1470: return retcode;
1.48 oster 1471:
1472: case RAIDFRAME_SET_ROOT:
1.78 minoura 1473: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186 perry 1474: printf("raid%d: New rootpartition value is: %d\n",
1.123 oster 1475: raidPtr->raidid, d);
1.78 minoura 1476: *(int *) data = d;
1.367 christos 1477: return retcode;
1.9 oster 1478:
1.1 oster 1479: /* initialize all parity */
1480: case RAIDFRAME_REWRITEPARITY:
1481:
1.42 oster 1482: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1483: /* Parity for RAID 0 is trivially correct */
1.42 oster 1484: raidPtr->parity_good = RF_RAID_CLEAN;
1.367 christos 1485: return 0;
1.17 oster 1486: }
1.186 perry 1487:
1.42 oster 1488: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1489: /* Re-write is already in progress! */
1.367 christos 1490: return EINVAL;
1.37 oster 1491: }
1.27 oster 1492:
1.367 christos 1493: return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1494: rf_RewriteParityThread, raidPtr,"raid_parity");
1.11 oster 1495:
1496: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1497: sparePtr = (RF_SingleComponent_t *) data;
1.367 christos 1498: memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1499: return rf_add_hot_spare(raidPtr, &component);
1.11 oster 1500:
1501: case RAIDFRAME_REMOVE_HOT_SPARE:
1.367 christos 1502: return retcode;
1.73 oster 1503:
1504: case RAIDFRAME_DELETE_COMPONENT:
1505: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1506: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1507: return rf_delete_component(raidPtr, &component);
1.73 oster 1508:
1509: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1510: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1511: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1512: return rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1513:
1.12 oster 1514: case RAIDFRAME_REBUILD_IN_PLACE:
1.367 christos 1515: return rf_rebuild_in_place(raidPtr, data);
1.24 oster 1516:
1.366 christos 1517: case RAIDFRAME_GET_INFO:
1.371 oster 1518: ucfgp = *(RF_DeviceConfig_t **)data;
1.374 christos 1519: d_cfg = RF_Malloc(sizeof(*d_cfg));
1.41 oster 1520: if (d_cfg == NULL)
1.366 christos 1521: return ENOMEM;
1.353 mrg 1522: retcode = rf_get_info(raidPtr, d_cfg);
1523: if (retcode == 0) {
1.371 oster 1524: retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1.41 oster 1525: }
1526: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1.366 christos 1527: return retcode;
1.9 oster 1528:
1.22 oster 1529: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1530: *(int *) data = raidPtr->parity_good;
1.367 christos 1531: return 0;
1.41 oster 1532:
1.269 jld 1533: case RAIDFRAME_PARITYMAP_STATUS:
1.273 jld 1534: if (rf_paritymap_ineligible(raidPtr))
1535: return EINVAL;
1.367 christos 1536: rf_paritymap_status(raidPtr->parity_map, data);
1.269 jld 1537: return 0;
1538:
1539: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273 jld 1540: if (rf_paritymap_ineligible(raidPtr))
1541: return EINVAL;
1.269 jld 1542: if (raidPtr->parity_map == NULL)
1543: return ENOENT; /* ??? */
1.367 christos 1544: if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1.269 jld 1545: return EINVAL;
1546: return 0;
1547:
1548: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273 jld 1549: if (rf_paritymap_ineligible(raidPtr))
1550: return EINVAL;
1.269 jld 1551: *(int *) data = rf_paritymap_get_disable(raidPtr);
1552: return 0;
1553:
1554: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273 jld 1555: if (rf_paritymap_ineligible(raidPtr))
1556: return EINVAL;
1.269 jld 1557: rf_paritymap_set_disable(raidPtr, *(int *)data);
1558: /* XXX should errors be passed up? */
1559: return 0;
1560:
1.1 oster 1561: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1562: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.367 christos 1563: return 0;
1.9 oster 1564:
1.1 oster 1565: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1566: totals = (RF_AccTotals_t *) data;
1.42 oster 1567: *totals = raidPtr->acc_totals;
1.366 christos 1568: return 0;
1.9 oster 1569:
1.1 oster 1570: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1571: raidPtr->keep_acc_totals = *(int *)data;
1.366 christos 1572: return 0;
1.9 oster 1573:
1.1 oster 1574: case RAIDFRAME_GET_SIZE:
1.42 oster 1575: *(int *) data = raidPtr->totalSectors;
1.366 christos 1576: return 0;
1.1 oster 1577:
1578: case RAIDFRAME_FAIL_DISK:
1.366 christos 1579: return rf_fail_disk(raidPtr, data);
1.9 oster 1580:
1581: /* invoke a copyback operation after recon on whatever disk
1582: * needs it, if any */
1583: case RAIDFRAME_COPYBACK:
1.24 oster 1584:
1.42 oster 1585: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1586: /* This makes no sense on a RAID 0!! */
1.367 christos 1587: return EINVAL;
1.24 oster 1588: }
1589:
1.42 oster 1590: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1591: /* Copyback is already in progress! */
1.367 christos 1592: return EINVAL;
1.37 oster 1593: }
1.27 oster 1594:
1.367 christos 1595: return RF_CREATE_THREAD(raidPtr->copyback_thread,
1596: rf_CopybackThread, raidPtr, "raid_copyback");
1.9 oster 1597:
1.1 oster 1598: /* return the percentage completion of reconstruction */
1.37 oster 1599: case RAIDFRAME_CHECK_RECON_STATUS:
1.367 christos 1600: return rf_check_recon_status(raidPtr, data);
1601:
1.83 oster 1602: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.353 mrg 1603: rf_check_recon_status_ext(raidPtr, data);
1.367 christos 1604: return 0;
1.9 oster 1605:
1.37 oster 1606: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1607: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1608: /* This makes no sense on a RAID 0, so tell the
1609: user it's done. */
1610: *(int *) data = 100;
1.367 christos 1611: return 0;
1.37 oster 1612: }
1.42 oster 1613: if (raidPtr->parity_rewrite_in_progress == 1) {
1.186 perry 1614: *(int *) data = 100 *
1615: raidPtr->parity_rewrite_stripes_done /
1.83 oster 1616: raidPtr->Layout.numStripe;
1.37 oster 1617: } else {
1618: *(int *) data = 100;
1619: }
1.367 christos 1620: return 0;
1.37 oster 1621:
1.83 oster 1622: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.353 mrg 1623: rf_check_parityrewrite_status_ext(raidPtr, data);
1.367 christos 1624: return 0;
1.83 oster 1625:
1.37 oster 1626: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1627: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1628: /* This makes no sense on a RAID 0 */
1.83 oster 1629: *(int *) data = 100;
1.367 christos 1630: return 0;
1.37 oster 1631: }
1.42 oster 1632: if (raidPtr->copyback_in_progress == 1) {
1633: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1634: raidPtr->Layout.numStripe;
1.37 oster 1635: } else {
1636: *(int *) data = 100;
1637: }
1.367 christos 1638: return 0;
1.37 oster 1639:
1.83 oster 1640: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.353 mrg 1641: rf_check_copyback_status_ext(raidPtr, data);
1642: return 0;
1.37 oster 1643:
1.341 christos 1644: case RAIDFRAME_SET_LAST_UNIT:
1645: for (column = 0; column < raidPtr->numCol; column++)
1646: if (raidPtr->Disks[column].status != rf_ds_optimal)
1647: return EBUSY;
1648:
1649: for (column = 0; column < raidPtr->numCol; column++) {
1650: clabel = raidget_component_label(raidPtr, column);
1651: clabel->last_unit = *(int *)data;
1652: raidflush_component_label(raidPtr, column);
1653: }
1654: rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1655: return 0;
1656:
1.9 oster 1657: /* the sparetable daemon calls this to wait for the kernel to
1658: * need a spare table. this ioctl does not return until a
1659: * spare table is needed. XXX -- calling mpsleep here in the
1660: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1661: * -- I should either compute the spare table in the kernel,
1662: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1663: * character device) for delivering the table -- XXX */
1.367 christos 1664: #if RF_DISABLED
1.1 oster 1665: case RAIDFRAME_SPARET_WAIT:
1.287 mrg 1666: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1667: while (!rf_sparet_wait_queue)
1.287 mrg 1668: rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.367 christos 1669: RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1.1 oster 1670: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287 mrg 1671: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1672:
1.42 oster 1673: /* structure assignment */
1.186 perry 1674: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1675:
1.1 oster 1676: RF_Free(waitreq, sizeof(*waitreq));
1.367 christos 1677: return 0;
1.9 oster 1678:
1679: /* wakes up a process waiting on SPARET_WAIT and puts an error
1680: * code in it that will cause the dameon to exit */
1.1 oster 1681: case RAIDFRAME_ABORT_SPARET_WAIT:
1.374 christos 1682: waitreq = RF_Malloc(sizeof(*waitreq));
1.1 oster 1683: waitreq->fcol = -1;
1.287 mrg 1684: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1685: waitreq->next = rf_sparet_wait_queue;
1686: rf_sparet_wait_queue = waitreq;
1.367 christos 1687: rf_broadcast_cond2(rf_sparet_wait_cv);
1.287 mrg 1688: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.367 christos 1689: return 0;
1.1 oster 1690:
1.9 oster 1691: /* used by the spare table daemon to deliver a spare table
1692: * into the kernel */
1.1 oster 1693: case RAIDFRAME_SEND_SPARET:
1.9 oster 1694:
1.1 oster 1695: /* install the spare table */
1.42 oster 1696: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1697:
1698: /* respond to the requestor. the return status of the spare
1699: * table installation is passed in the "fcol" field */
1.374 christos 1700: waitred = RF_Malloc(sizeof(*waitreq));
1.1 oster 1701: waitreq->fcol = retcode;
1.287 mrg 1702: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1703: waitreq->next = rf_sparet_resp_queue;
1704: rf_sparet_resp_queue = waitreq;
1.287 mrg 1705: rf_broadcast_cond2(rf_sparet_resp_cv);
1706: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1707:
1.367 christos 1708: return retcode;
1709: #endif
1710: default:
1.372 christos 1711: /*
1712: * Don't bother trying to load compat modules
1713: * if it is not our ioctl. This is more efficient
1714: * and makes rump tests not depend on compat code
1715: */
1716: if (IOCGROUP(cmd) != 'r')
1717: break;
1.367 christos 1718: #ifdef _LP64
1719: if ((l->l_proc->p_flag & PK_32) != 0) {
1720: module_autoload("compat_netbsd32_raid",
1721: MODULE_CLASS_EXEC);
1.376 pgoyette 1722: MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1.367 christos 1723: (rs, cmd, data), enosys(), retcode);
1724: if (retcode != EPASSTHROUGH)
1725: return retcode;
1726: }
1.1 oster 1727: #endif
1.367 christos 1728: module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1.376 pgoyette 1729: MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1.367 christos 1730: (rs, cmd, data), enosys(), retcode);
1731: if (retcode != EPASSTHROUGH)
1732: return retcode;
1.1 oster 1733:
1.367 christos 1734: module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1.376 pgoyette 1735: MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1.367 christos 1736: (rs, cmd, data), enosys(), retcode);
1737: if (retcode != EPASSTHROUGH)
1738: return retcode;
1.36 oster 1739: break; /* fall through to the os-specific code below */
1.1 oster 1740:
1741: }
1.9 oster 1742:
1.42 oster 1743: if (!raidPtr->valid)
1.9 oster 1744: return (EINVAL);
1745:
1.1 oster 1746: /*
1747: * Add support for "regular" device ioctls here.
1748: */
1.263 haad 1749:
1.1 oster 1750: switch (cmd) {
1.348 jdolecek 1751: case DIOCGCACHE:
1752: retcode = rf_get_component_caches(raidPtr, (int *)data);
1753: break;
1754:
1.252 oster 1755: case DIOCCACHESYNC:
1.346 jdolecek 1756: retcode = rf_sync_component_caches(raidPtr);
1.347 jdolecek 1757: break;
1.298 buhrow 1758:
1.1 oster 1759: default:
1.346 jdolecek 1760: retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1.347 jdolecek 1761: break;
1.1 oster 1762: }
1.346 jdolecek 1763:
1.9 oster 1764: return (retcode);
1.1 oster 1765:
1766: }
1767:
1768:
1.9 oster 1769: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1770: RAIDframe device. */
1771:
1772:
1.59 oster 1773: static void
1.300 christos 1774: raidinit(struct raid_softc *rs)
1.1 oster 1775: {
1.262 cegger 1776: cfdata_t cf;
1.335 mlelstv 1777: unsigned int unit;
1778: struct dk_softc *dksc = &rs->sc_dksc;
1.300 christos 1779: RF_Raid_t *raidPtr = &rs->sc_r;
1.335 mlelstv 1780: device_t dev;
1.1 oster 1781:
1.59 oster 1782: unit = raidPtr->raidid;
1.1 oster 1783:
1.179 itojun 1784: /* XXX doesn't check bounds. */
1.335 mlelstv 1785: snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1.1 oster 1786:
1.217 oster 1787: /* attach the pseudo device */
1788: cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1789: cf->cf_name = raid_cd.cd_name;
1790: cf->cf_atname = raid_cd.cd_name;
1791: cf->cf_unit = unit;
1792: cf->cf_fstate = FSTATE_STAR;
1793:
1.335 mlelstv 1794: dev = config_attach_pseudo(cf);
1795: if (dev == NULL) {
1.217 oster 1796: printf("raid%d: config_attach_pseudo failed\n",
1.270 christos 1797: raidPtr->raidid);
1.265 pooka 1798: free(cf, M_RAIDFRAME);
1799: return;
1.217 oster 1800: }
1801:
1.335 mlelstv 1802: /* provide a backpointer to the real softc */
1803: raidsoftc(dev) = rs;
1804:
1.1 oster 1805: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1806: * other things, so it's critical to call this *BEFORE* we try putzing
1807: * with disklabels. */
1.335 mlelstv 1808: dk_init(dksc, dev, DKTYPE_RAID);
1809: disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.1 oster 1810:
1811: /* XXX There may be a weird interaction here between this, and
1.9 oster 1812: * protectedSectors, as used in RAIDframe. */
1.11 oster 1813:
1.9 oster 1814: rs->sc_size = raidPtr->totalSectors;
1.234 oster 1815:
1.335 mlelstv 1816: /* Attach dk and disk subsystems */
1817: dk_attach(dksc);
1818: disk_attach(&dksc->sc_dkdev);
1.318 mlelstv 1819: rf_set_geometry(rs, raidPtr);
1820:
1.335 mlelstv 1821: bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1822:
1823: /* mark unit as usuable */
1824: rs->sc_flags |= RAIDF_INITED;
1.234 oster 1825:
1.335 mlelstv 1826: dkwedge_discover(&dksc->sc_dkdev);
1.1 oster 1827: }
1.335 mlelstv 1828:
1.150 oster 1829: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1 oster 1830: /* wake up the daemon & tell it to get us a spare table
1831: * XXX
1.9 oster 1832: * the entries in the queues should be tagged with the raidPtr
1.186 perry 1833: * so that in the extremely rare case that two recons happen at once,
1.11 oster 1834: * we know for which device were requesting a spare table
1.1 oster 1835: * XXX
1.186 perry 1836: *
1.39 oster 1837: * XXX This code is not currently used. GO
1.1 oster 1838: */
1.186 perry 1839: int
1.169 oster 1840: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9 oster 1841: {
1842: int retcode;
1843:
1.287 mrg 1844: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1845: req->next = rf_sparet_wait_queue;
1846: rf_sparet_wait_queue = req;
1.289 mrg 1847: rf_broadcast_cond2(rf_sparet_wait_cv);
1.9 oster 1848:
1849: /* mpsleep unlocks the mutex */
1850: while (!rf_sparet_resp_queue) {
1.289 mrg 1851: rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9 oster 1852: }
1853: req = rf_sparet_resp_queue;
1854: rf_sparet_resp_queue = req->next;
1.287 mrg 1855: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1856:
1857: retcode = req->fcol;
1858: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1859: * alloc'd */
1860: return (retcode);
1.1 oster 1861: }
1.150 oster 1862: #endif
1.39 oster 1863:
1.186 perry 1864: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11 oster 1865: * bp & passes it down.
1.1 oster 1866: * any calls originating in the kernel must use non-blocking I/O
1867: * do some extra sanity checking to return "appropriate" error values for
1868: * certain conditions (to make some standard utilities work)
1.186 perry 1869: *
1.34 oster 1870: * Formerly known as: rf_DoAccessKernel
1.1 oster 1871: */
1.34 oster 1872: void
1.169 oster 1873: raidstart(RF_Raid_t *raidPtr)
1.1 oster 1874: {
1875: struct raid_softc *rs;
1.335 mlelstv 1876: struct dk_softc *dksc;
1.1 oster 1877:
1.300 christos 1878: rs = raidPtr->softc;
1.335 mlelstv 1879: dksc = &rs->sc_dksc;
1.56 oster 1880: /* quick check to see if anything has died recently */
1.291 mrg 1881: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1882: if (raidPtr->numNewFailures > 0) {
1.291 mrg 1883: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1884: rf_update_component_labels(raidPtr,
1.91 oster 1885: RF_NORMAL_COMPONENT_UPDATE);
1.291 mrg 1886: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1887: raidPtr->numNewFailures--;
1888: }
1.335 mlelstv 1889: rf_unlock_mutex2(raidPtr->mutex);
1.56 oster 1890:
1.335 mlelstv 1891: if ((rs->sc_flags & RAIDF_INITED) == 0) {
1892: printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1893: return;
1894: }
1.34 oster 1895:
1.335 mlelstv 1896: dk_start(dksc, NULL);
1897: }
1.34 oster 1898:
1.335 mlelstv 1899: static int
1900: raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1901: {
1902: RF_SectorCount_t num_blocks, pb, sum;
1903: RF_RaidAddr_t raid_addr;
1904: daddr_t blocknum;
1905: int do_async;
1906: int rc;
1.186 perry 1907:
1.335 mlelstv 1908: rf_lock_mutex2(raidPtr->mutex);
1909: if (raidPtr->openings == 0) {
1910: rf_unlock_mutex2(raidPtr->mutex);
1911: return EAGAIN;
1912: }
1913: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1914:
1.335 mlelstv 1915: blocknum = bp->b_rawblkno;
1.186 perry 1916:
1.335 mlelstv 1917: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1918: (int) blocknum));
1.1 oster 1919:
1.335 mlelstv 1920: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1921: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1 oster 1922:
1.335 mlelstv 1923: /* *THIS* is where we adjust what block we're going to...
1924: * but DO NOT TOUCH bp->b_blkno!!! */
1925: raid_addr = blocknum;
1926:
1927: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1928: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1929: sum = raid_addr + num_blocks + pb;
1930: if (1 || rf_debugKernelAccess) {
1931: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1932: (int) raid_addr, (int) sum, (int) num_blocks,
1933: (int) pb, (int) bp->b_resid));
1934: }
1935: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1936: || (sum < num_blocks) || (sum < pb)) {
1937: rc = ENOSPC;
1938: goto done;
1939: }
1940: /*
1941: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1942: */
1.186 perry 1943:
1.335 mlelstv 1944: if (bp->b_bcount & raidPtr->sectorMask) {
1945: rc = ENOSPC;
1946: goto done;
1947: }
1948: db1_printf(("Calling DoAccess..\n"));
1.99 oster 1949:
1.20 oster 1950:
1.335 mlelstv 1951: rf_lock_mutex2(raidPtr->mutex);
1952: raidPtr->openings--;
1.291 mrg 1953: rf_unlock_mutex2(raidPtr->mutex);
1.20 oster 1954:
1.335 mlelstv 1955: /*
1956: * Everything is async.
1957: */
1958: do_async = 1;
1.20 oster 1959:
1.335 mlelstv 1960: /* don't ever condition on bp->b_flags & B_WRITE.
1961: * always condition on B_READ instead */
1.7 explorer 1962:
1.335 mlelstv 1963: rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1964: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1965: do_async, raid_addr, num_blocks,
1966: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1967:
1968: done:
1969: return rc;
1970: }
1.7 explorer 1971:
1.1 oster 1972: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1973:
1.186 perry 1974: int
1.169 oster 1975: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1 oster 1976: {
1.9 oster 1977: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 1978: struct buf *bp;
1.9 oster 1979:
1.1 oster 1980: req->queue = queue;
1981: bp = req->bp;
1982:
1983: switch (req->type) {
1.9 oster 1984: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 1985: /* XXX need to do something extra here.. */
1.9 oster 1986: /* I'm leaving this in, as I've never actually seen it used,
1987: * and I'd like folks to report it... GO */
1.1 oster 1988: printf(("WAKEUP CALLED\n"));
1989: queue->numOutstanding++;
1990:
1.197 oster 1991: bp->b_flags = 0;
1.207 simonb 1992: bp->b_private = req;
1.1 oster 1993:
1.194 oster 1994: KernelWakeupFunc(bp);
1.1 oster 1995: break;
1.9 oster 1996:
1.1 oster 1997: case RF_IO_TYPE_READ:
1998: case RF_IO_TYPE_WRITE:
1.175 oster 1999: #if RF_ACC_TRACE > 0
1.1 oster 2000: if (req->tracerec) {
2001: RF_ETIMER_START(req->tracerec->timer);
2002: }
1.175 oster 2003: #endif
1.194 oster 2004: InitBP(bp, queue->rf_cinfo->ci_vp,
1.197 oster 2005: op, queue->rf_cinfo->ci_dev,
1.9 oster 2006: req->sectorOffset, req->numSector,
2007: req->buf, KernelWakeupFunc, (void *) req,
2008: queue->raidPtr->logBytesPerSector, req->b_proc);
1.1 oster 2009:
2010: if (rf_debugKernelAccess) {
1.9 oster 2011: db1_printf(("dispatch: bp->b_blkno = %ld\n",
2012: (long) bp->b_blkno));
1.1 oster 2013: }
2014: queue->numOutstanding++;
2015: queue->last_deq_sector = req->sectorOffset;
1.9 oster 2016: /* acc wouldn't have been let in if there were any pending
2017: * reqs at any other priority */
1.1 oster 2018: queue->curPriority = req->priority;
2019:
1.166 oster 2020: db1_printf(("Going for %c to unit %d col %d\n",
1.186 perry 2021: req->type, queue->raidPtr->raidid,
1.166 oster 2022: queue->col));
1.1 oster 2023: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 2024: (int) req->sectorOffset, (int) req->numSector,
2025: (int) (req->numSector <<
2026: queue->raidPtr->logBytesPerSector),
2027: (int) queue->raidPtr->logBytesPerSector));
1.256 oster 2028:
2029: /*
2030: * XXX: drop lock here since this can block at
2031: * least with backing SCSI devices. Retake it
2032: * to minimize fuss with calling interfaces.
2033: */
2034:
2035: RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247 oster 2036: bdev_strategy(bp);
1.256 oster 2037: RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1 oster 2038: break;
1.9 oster 2039:
1.1 oster 2040: default:
2041: panic("bad req->type in rf_DispatchKernelIO");
2042: }
2043: db1_printf(("Exiting from DispatchKernelIO\n"));
1.134 oster 2044:
1.9 oster 2045: return (0);
1.1 oster 2046: }
1.9 oster 2047: /* this is the callback function associated with a I/O invoked from
1.1 oster 2048: kernel code.
2049: */
1.186 perry 2050: static void
1.194 oster 2051: KernelWakeupFunc(struct buf *bp)
1.9 oster 2052: {
2053: RF_DiskQueueData_t *req = NULL;
2054: RF_DiskQueue_t *queue;
2055:
2056: db1_printf(("recovering the request queue:\n"));
1.285 mrg 2057:
1.207 simonb 2058: req = bp->b_private;
1.1 oster 2059:
1.9 oster 2060: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 2061:
1.286 mrg 2062: rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285 mrg 2063:
1.175 oster 2064: #if RF_ACC_TRACE > 0
1.9 oster 2065: if (req->tracerec) {
2066: RF_ETIMER_STOP(req->tracerec->timer);
2067: RF_ETIMER_EVAL(req->tracerec->timer);
1.288 mrg 2068: rf_lock_mutex2(rf_tracing_mutex);
1.9 oster 2069: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2070: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2071: req->tracerec->num_phys_ios++;
1.288 mrg 2072: rf_unlock_mutex2(rf_tracing_mutex);
1.9 oster 2073: }
1.175 oster 2074: #endif
1.1 oster 2075:
1.230 ad 2076: /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9 oster 2077: * ballistic, and mark the component as hosed... */
1.36 oster 2078:
1.230 ad 2079: if (bp->b_error != 0) {
1.9 oster 2080: /* Mark the disk as dead */
2081: /* but only mark it once... */
1.186 perry 2082: /* and only if it wouldn't leave this RAID set
1.183 oster 2083: completely broken */
1.193 oster 2084: if (((queue->raidPtr->Disks[queue->col].status ==
2085: rf_ds_optimal) ||
2086: (queue->raidPtr->Disks[queue->col].status ==
2087: rf_ds_used_spare)) &&
2088: (queue->raidPtr->numFailures <
1.204 simonb 2089: queue->raidPtr->Layout.map->faultsTolerated)) {
1.322 prlw1 2090: printf("raid%d: IO Error (%d). Marking %s as failed.\n",
1.136 oster 2091: queue->raidPtr->raidid,
1.322 prlw1 2092: bp->b_error,
1.166 oster 2093: queue->raidPtr->Disks[queue->col].devname);
2094: queue->raidPtr->Disks[queue->col].status =
1.9 oster 2095: rf_ds_failed;
1.166 oster 2096: queue->raidPtr->status = rf_rs_degraded;
1.9 oster 2097: queue->raidPtr->numFailures++;
1.56 oster 2098: queue->raidPtr->numNewFailures++;
1.9 oster 2099: } else { /* Disk is already dead... */
2100: /* printf("Disk already marked as dead!\n"); */
2101: }
1.4 oster 2102:
1.9 oster 2103: }
1.4 oster 2104:
1.143 oster 2105: /* Fill in the error value */
1.230 ad 2106: req->error = bp->b_error;
1.143 oster 2107:
2108: /* Drop this one on the "finished" queue... */
2109: TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2110:
2111: /* Let the raidio thread know there is work to be done. */
1.286 mrg 2112: rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143 oster 2113:
1.286 mrg 2114: rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1 oster 2115: }
2116:
2117:
2118: /*
2119: * initialize a buf structure for doing an I/O in the kernel.
2120: */
1.186 perry 2121: static void
1.169 oster 2122: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225 christos 2123: RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.169 oster 2124: void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2125: struct proc *b_proc)
1.9 oster 2126: {
2127: /* bp->b_flags = B_PHYS | rw_flag; */
1.242 ad 2128: bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2129: bp->b_oflags = 0;
2130: bp->b_cflags = 0;
1.9 oster 2131: bp->b_bcount = numSect << logBytesPerSector;
2132: bp->b_bufsize = bp->b_bcount;
2133: bp->b_error = 0;
2134: bp->b_dev = dev;
1.187 christos 2135: bp->b_data = bf;
1.275 mrg 2136: bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9 oster 2137: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 2138: if (bp->b_bcount == 0) {
1.141 provos 2139: panic("bp->b_bcount is zero in InitBP!!");
1.1 oster 2140: }
1.161 fvdl 2141: bp->b_proc = b_proc;
1.9 oster 2142: bp->b_iodone = cbFunc;
1.207 simonb 2143: bp->b_private = cbArg;
1.1 oster 2144: }
2145:
2146: /*
2147: * Wait interruptibly for an exclusive lock.
2148: *
2149: * XXX
2150: * Several drivers do this; it should be abstracted and made MP-safe.
2151: * (Hmm... where have we seen this warning before :-> GO )
2152: */
2153: static int
1.169 oster 2154: raidlock(struct raid_softc *rs)
1.1 oster 2155: {
1.9 oster 2156: int error;
1.1 oster 2157:
1.335 mlelstv 2158: error = 0;
1.327 pgoyette 2159: mutex_enter(&rs->sc_mutex);
1.1 oster 2160: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2161: rs->sc_flags |= RAIDF_WANTED;
1.327 pgoyette 2162: error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2163: if (error != 0)
1.335 mlelstv 2164: goto done;
1.1 oster 2165: }
2166: rs->sc_flags |= RAIDF_LOCKED;
1.335 mlelstv 2167: done:
1.327 pgoyette 2168: mutex_exit(&rs->sc_mutex);
1.335 mlelstv 2169: return (error);
1.1 oster 2170: }
2171: /*
2172: * Unlock and wake up any waiters.
2173: */
2174: static void
1.169 oster 2175: raidunlock(struct raid_softc *rs)
1.1 oster 2176: {
2177:
1.327 pgoyette 2178: mutex_enter(&rs->sc_mutex);
1.1 oster 2179: rs->sc_flags &= ~RAIDF_LOCKED;
2180: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2181: rs->sc_flags &= ~RAIDF_WANTED;
1.327 pgoyette 2182: cv_broadcast(&rs->sc_cv);
1.1 oster 2183: }
1.327 pgoyette 2184: mutex_exit(&rs->sc_mutex);
1.11 oster 2185: }
1.186 perry 2186:
1.11 oster 2187:
2188: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2189: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1.269 jld 2190: #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
1.11 oster 2191:
1.276 mrg 2192: static daddr_t
2193: rf_component_info_offset(void)
2194: {
2195:
2196: return RF_COMPONENT_INFO_OFFSET;
2197: }
2198:
2199: static daddr_t
2200: rf_component_info_size(unsigned secsize)
2201: {
2202: daddr_t info_size;
2203:
2204: KASSERT(secsize);
2205: if (secsize > RF_COMPONENT_INFO_SIZE)
2206: info_size = secsize;
2207: else
2208: info_size = RF_COMPONENT_INFO_SIZE;
2209:
2210: return info_size;
2211: }
2212:
2213: static daddr_t
2214: rf_parity_map_offset(RF_Raid_t *raidPtr)
2215: {
2216: daddr_t map_offset;
2217:
2218: KASSERT(raidPtr->bytesPerSector);
2219: if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2220: map_offset = raidPtr->bytesPerSector;
2221: else
2222: map_offset = RF_COMPONENT_INFO_SIZE;
2223: map_offset += rf_component_info_offset();
2224:
2225: return map_offset;
2226: }
2227:
2228: static daddr_t
2229: rf_parity_map_size(RF_Raid_t *raidPtr)
2230: {
2231: daddr_t map_size;
2232:
2233: if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2234: map_size = raidPtr->bytesPerSector;
2235: else
2236: map_size = RF_PARITY_MAP_SIZE;
2237:
2238: return map_size;
2239: }
2240:
1.186 perry 2241: int
1.269 jld 2242: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12 oster 2243: {
1.269 jld 2244: RF_ComponentLabel_t *clabel;
2245:
2246: clabel = raidget_component_label(raidPtr, col);
2247: clabel->clean = RF_RAID_CLEAN;
2248: raidflush_component_label(raidPtr, col);
1.12 oster 2249: return(0);
2250: }
2251:
2252:
1.186 perry 2253: int
1.269 jld 2254: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11 oster 2255: {
1.269 jld 2256: RF_ComponentLabel_t *clabel;
2257:
2258: clabel = raidget_component_label(raidPtr, col);
2259: clabel->clean = RF_RAID_DIRTY;
2260: raidflush_component_label(raidPtr, col);
1.11 oster 2261: return(0);
2262: }
2263:
2264: int
1.269 jld 2265: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2266: {
1.276 mrg 2267: KASSERT(raidPtr->bytesPerSector);
2268: return raidread_component_label(raidPtr->bytesPerSector,
2269: raidPtr->Disks[col].dev,
1.269 jld 2270: raidPtr->raid_cinfo[col].ci_vp,
2271: &raidPtr->raid_cinfo[col].ci_label);
2272: }
2273:
2274: RF_ComponentLabel_t *
2275: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2276: {
2277: return &raidPtr->raid_cinfo[col].ci_label;
2278: }
2279:
2280: int
2281: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2282: {
2283: RF_ComponentLabel_t *label;
2284:
2285: label = &raidPtr->raid_cinfo[col].ci_label;
2286: label->mod_counter = raidPtr->mod_counter;
2287: #ifndef RF_NO_PARITY_MAP
2288: label->parity_map_modcount = label->mod_counter;
2289: #endif
1.276 mrg 2290: return raidwrite_component_label(raidPtr->bytesPerSector,
2291: raidPtr->Disks[col].dev,
1.269 jld 2292: raidPtr->raid_cinfo[col].ci_vp, label);
2293: }
2294:
2295:
2296: static int
1.276 mrg 2297: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269 jld 2298: RF_ComponentLabel_t *clabel)
2299: {
2300: return raidread_component_area(dev, b_vp, clabel,
2301: sizeof(RF_ComponentLabel_t),
1.276 mrg 2302: rf_component_info_offset(),
2303: rf_component_info_size(secsize));
1.269 jld 2304: }
2305:
2306: /* ARGSUSED */
2307: static int
2308: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2309: size_t msize, daddr_t offset, daddr_t dsize)
1.11 oster 2310: {
2311: struct buf *bp;
2312: int error;
1.186 perry 2313:
1.11 oster 2314: /* XXX should probably ensure that we don't try to do this if
1.186 perry 2315: someone has changed rf_protected_sectors. */
1.11 oster 2316:
1.98 oster 2317: if (b_vp == NULL) {
2318: /* For whatever reason, this component is not valid.
2319: Don't try to read a component label from it. */
2320: return(EINVAL);
2321: }
2322:
1.11 oster 2323: /* get a block of the appropriate size... */
1.269 jld 2324: bp = geteblk((int)dsize);
1.11 oster 2325: bp->b_dev = dev;
2326:
2327: /* get our ducks in a row for the read */
1.269 jld 2328: bp->b_blkno = offset / DEV_BSIZE;
2329: bp->b_bcount = dsize;
1.100 chs 2330: bp->b_flags |= B_READ;
1.269 jld 2331: bp->b_resid = dsize;
1.11 oster 2332:
1.331 mlelstv 2333: bdev_strategy(bp);
1.340 christos 2334: error = biowait(bp);
1.11 oster 2335:
2336: if (!error) {
1.269 jld 2337: memcpy(data, bp->b_data, msize);
1.204 simonb 2338: }
1.11 oster 2339:
1.233 ad 2340: brelse(bp, 0);
1.11 oster 2341: return(error);
2342: }
1.269 jld 2343:
2344:
2345: static int
1.276 mrg 2346: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2347: RF_ComponentLabel_t *clabel)
1.269 jld 2348: {
2349: return raidwrite_component_area(dev, b_vp, clabel,
2350: sizeof(RF_ComponentLabel_t),
1.276 mrg 2351: rf_component_info_offset(),
2352: rf_component_info_size(secsize), 0);
1.269 jld 2353: }
2354:
1.11 oster 2355: /* ARGSUSED */
1.269 jld 2356: static int
2357: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2358: size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11 oster 2359: {
2360: struct buf *bp;
2361: int error;
2362:
2363: /* get a block of the appropriate size... */
1.269 jld 2364: bp = geteblk((int)dsize);
1.11 oster 2365: bp->b_dev = dev;
2366:
2367: /* get our ducks in a row for the write */
1.269 jld 2368: bp->b_blkno = offset / DEV_BSIZE;
2369: bp->b_bcount = dsize;
2370: bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2371: bp->b_resid = dsize;
1.11 oster 2372:
1.269 jld 2373: memset(bp->b_data, 0, dsize);
2374: memcpy(bp->b_data, data, msize);
1.11 oster 2375:
1.331 mlelstv 2376: bdev_strategy(bp);
1.269 jld 2377: if (asyncp)
2378: return 0;
1.340 christos 2379: error = biowait(bp);
1.233 ad 2380: brelse(bp, 0);
1.11 oster 2381: if (error) {
1.48 oster 2382: #if 1
1.11 oster 2383: printf("Failed to write RAID component info!\n");
1.48 oster 2384: #endif
1.11 oster 2385: }
2386:
2387: return(error);
1.1 oster 2388: }
1.12 oster 2389:
1.186 perry 2390: void
1.269 jld 2391: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2392: {
2393: int c;
2394:
2395: for (c = 0; c < raidPtr->numCol; c++) {
2396: /* Skip dead disks. */
2397: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2398: continue;
2399: /* XXXjld: what if an error occurs here? */
2400: raidwrite_component_area(raidPtr->Disks[c].dev,
2401: raidPtr->raid_cinfo[c].ci_vp, map,
2402: RF_PARITYMAP_NBYTE,
1.276 mrg 2403: rf_parity_map_offset(raidPtr),
2404: rf_parity_map_size(raidPtr), 0);
1.269 jld 2405: }
2406: }
2407:
2408: void
2409: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2410: {
2411: struct rf_paritymap_ondisk tmp;
1.272 oster 2412: int c,first;
1.269 jld 2413:
1.272 oster 2414: first=1;
1.269 jld 2415: for (c = 0; c < raidPtr->numCol; c++) {
2416: /* Skip dead disks. */
2417: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2418: continue;
2419: raidread_component_area(raidPtr->Disks[c].dev,
2420: raidPtr->raid_cinfo[c].ci_vp, &tmp,
2421: RF_PARITYMAP_NBYTE,
1.276 mrg 2422: rf_parity_map_offset(raidPtr),
2423: rf_parity_map_size(raidPtr));
1.272 oster 2424: if (first) {
1.269 jld 2425: memcpy(map, &tmp, sizeof(*map));
1.272 oster 2426: first = 0;
1.269 jld 2427: } else {
2428: rf_paritymap_merge(map, &tmp);
2429: }
2430: }
2431: }
2432:
2433: void
1.169 oster 2434: rf_markalldirty(RF_Raid_t *raidPtr)
1.12 oster 2435: {
1.269 jld 2436: RF_ComponentLabel_t *clabel;
1.146 oster 2437: int sparecol;
1.166 oster 2438: int c;
2439: int j;
2440: int scol = -1;
1.12 oster 2441:
2442: raidPtr->mod_counter++;
1.166 oster 2443: for (c = 0; c < raidPtr->numCol; c++) {
2444: /* we don't want to touch (at all) a disk that has
2445: failed */
2446: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269 jld 2447: clabel = raidget_component_label(raidPtr, c);
2448: if (clabel->status == rf_ds_spared) {
1.186 perry 2449: /* XXX do something special...
2450: but whatever you do, don't
1.166 oster 2451: try to access it!! */
2452: } else {
1.269 jld 2453: raidmarkdirty(raidPtr, c);
1.12 oster 2454: }
1.166 oster 2455: }
1.186 perry 2456: }
1.146 oster 2457:
1.12 oster 2458: for( c = 0; c < raidPtr->numSpare ; c++) {
2459: sparecol = raidPtr->numCol + c;
1.166 oster 2460: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2461: /*
2462:
2463: we claim this disk is "optimal" if it's
2464: rf_ds_used_spare, as that means it should be
2465: directly substitutable for the disk it replaced.
1.12 oster 2466: We note that too...
2467:
2468: */
2469:
1.166 oster 2470: for(j=0;j<raidPtr->numCol;j++) {
2471: if (raidPtr->Disks[j].spareCol == sparecol) {
2472: scol = j;
2473: break;
1.12 oster 2474: }
2475: }
1.186 perry 2476:
1.269 jld 2477: clabel = raidget_component_label(raidPtr, sparecol);
1.12 oster 2478: /* make sure status is noted */
1.146 oster 2479:
1.269 jld 2480: raid_init_component_label(raidPtr, clabel);
1.146 oster 2481:
1.269 jld 2482: clabel->row = 0;
2483: clabel->column = scol;
1.146 oster 2484: /* Note: we *don't* change status from rf_ds_used_spare
2485: to rf_ds_optimal */
2486: /* clabel.status = rf_ds_optimal; */
1.186 perry 2487:
1.269 jld 2488: raidmarkdirty(raidPtr, sparecol);
1.12 oster 2489: }
2490: }
2491: }
2492:
1.13 oster 2493:
2494: void
1.169 oster 2495: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13 oster 2496: {
1.269 jld 2497: RF_ComponentLabel_t *clabel;
1.13 oster 2498: int sparecol;
1.166 oster 2499: int c;
2500: int j;
2501: int scol;
1.341 christos 2502: struct raid_softc *rs = raidPtr->softc;
1.13 oster 2503:
2504: scol = -1;
2505:
1.186 perry 2506: /* XXX should do extra checks to make sure things really are clean,
1.13 oster 2507: rather than blindly setting the clean bit... */
2508:
2509: raidPtr->mod_counter++;
2510:
1.166 oster 2511: for (c = 0; c < raidPtr->numCol; c++) {
2512: if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269 jld 2513: clabel = raidget_component_label(raidPtr, c);
1.201 oster 2514: /* make sure status is noted */
1.269 jld 2515: clabel->status = rf_ds_optimal;
1.201 oster 2516:
1.214 oster 2517: /* note what unit we are configured as */
1.341 christos 2518: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2519: clabel->last_unit = raidPtr->raidid;
1.214 oster 2520:
1.269 jld 2521: raidflush_component_label(raidPtr, c);
1.166 oster 2522: if (final == RF_FINAL_COMPONENT_UPDATE) {
2523: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2524: raidmarkclean(raidPtr, c);
1.91 oster 2525: }
1.166 oster 2526: }
1.186 perry 2527: }
1.166 oster 2528: /* else we don't touch it.. */
1.186 perry 2529: }
1.63 oster 2530:
2531: for( c = 0; c < raidPtr->numSpare ; c++) {
2532: sparecol = raidPtr->numCol + c;
1.110 oster 2533: /* Need to ensure that the reconstruct actually completed! */
1.166 oster 2534: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2535: /*
2536:
2537: we claim this disk is "optimal" if it's
2538: rf_ds_used_spare, as that means it should be
2539: directly substitutable for the disk it replaced.
1.63 oster 2540: We note that too...
2541:
2542: */
2543:
1.166 oster 2544: for(j=0;j<raidPtr->numCol;j++) {
2545: if (raidPtr->Disks[j].spareCol == sparecol) {
2546: scol = j;
2547: break;
1.63 oster 2548: }
2549: }
1.186 perry 2550:
1.63 oster 2551: /* XXX shouldn't *really* need this... */
1.269 jld 2552: clabel = raidget_component_label(raidPtr, sparecol);
1.63 oster 2553: /* make sure status is noted */
2554:
1.269 jld 2555: raid_init_component_label(raidPtr, clabel);
2556:
2557: clabel->column = scol;
2558: clabel->status = rf_ds_optimal;
1.341 christos 2559: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2560: clabel->last_unit = raidPtr->raidid;
1.63 oster 2561:
1.269 jld 2562: raidflush_component_label(raidPtr, sparecol);
1.91 oster 2563: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2564: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2565: raidmarkclean(raidPtr, sparecol);
1.13 oster 2566: }
2567: }
2568: }
2569: }
1.68 oster 2570: }
2571:
2572: void
1.169 oster 2573: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69 oster 2574: {
2575:
2576: if (vp != NULL) {
2577: if (auto_configured == 1) {
1.96 oster 2578: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2579: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69 oster 2580: vput(vp);
1.186 perry 2581:
2582: } else {
1.244 ad 2583: (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69 oster 2584: }
1.186 perry 2585: }
1.69 oster 2586: }
2587:
2588:
2589: void
1.169 oster 2590: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68 oster 2591: {
1.186 perry 2592: int r,c;
1.69 oster 2593: struct vnode *vp;
2594: int acd;
1.68 oster 2595:
2596:
2597: /* We take this opportunity to close the vnodes like we should.. */
2598:
1.166 oster 2599: for (c = 0; c < raidPtr->numCol; c++) {
2600: vp = raidPtr->raid_cinfo[c].ci_vp;
2601: acd = raidPtr->Disks[c].auto_configured;
2602: rf_close_component(raidPtr, vp, acd);
2603: raidPtr->raid_cinfo[c].ci_vp = NULL;
2604: raidPtr->Disks[c].auto_configured = 0;
1.68 oster 2605: }
1.166 oster 2606:
1.68 oster 2607: for (r = 0; r < raidPtr->numSpare; r++) {
1.166 oster 2608: vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2609: acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69 oster 2610: rf_close_component(raidPtr, vp, acd);
1.166 oster 2611: raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2612: raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2613: }
1.37 oster 2614: }
1.63 oster 2615:
1.37 oster 2616:
1.186 perry 2617: void
1.353 mrg 2618: rf_ReconThread(struct rf_recon_req_internal *req)
1.37 oster 2619: {
2620: int s;
2621: RF_Raid_t *raidPtr;
2622:
2623: s = splbio();
2624: raidPtr = (RF_Raid_t *) req->raidPtr;
2625: raidPtr->recon_in_progress = 1;
2626:
1.166 oster 2627: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37 oster 2628: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2629:
2630: RF_Free(req, sizeof(*req));
2631:
2632: raidPtr->recon_in_progress = 0;
2633: splx(s);
2634:
2635: /* That's all... */
1.204 simonb 2636: kthread_exit(0); /* does not return */
1.37 oster 2637: }
2638:
2639: void
1.169 oster 2640: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37 oster 2641: {
2642: int retcode;
2643: int s;
2644:
1.184 oster 2645: raidPtr->parity_rewrite_stripes_done = 0;
1.37 oster 2646: raidPtr->parity_rewrite_in_progress = 1;
2647: s = splbio();
2648: retcode = rf_RewriteParity(raidPtr);
2649: splx(s);
2650: if (retcode) {
1.279 christos 2651: printf("raid%d: Error re-writing parity (%d)!\n",
2652: raidPtr->raidid, retcode);
1.37 oster 2653: } else {
2654: /* set the clean bit! If we shutdown correctly,
2655: the clean bit on each component label will get
2656: set */
2657: raidPtr->parity_good = RF_RAID_CLEAN;
2658: }
2659: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2660:
2661: /* Anyone waiting for us to stop? If so, inform them... */
2662: if (raidPtr->waitShutdown) {
1.357 mrg 2663: rf_lock_mutex2(raidPtr->rad_lock);
2664: cv_broadcast(&raidPtr->parity_rewrite_cv);
2665: rf_unlock_mutex2(raidPtr->rad_lock);
1.85 oster 2666: }
1.37 oster 2667:
2668: /* That's all... */
1.204 simonb 2669: kthread_exit(0); /* does not return */
1.37 oster 2670: }
2671:
2672:
2673: void
1.169 oster 2674: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37 oster 2675: {
2676: int s;
2677:
2678: raidPtr->copyback_in_progress = 1;
2679: s = splbio();
2680: rf_CopybackReconstructedData(raidPtr);
2681: splx(s);
2682: raidPtr->copyback_in_progress = 0;
2683:
2684: /* That's all... */
1.204 simonb 2685: kthread_exit(0); /* does not return */
1.37 oster 2686: }
2687:
2688:
2689: void
1.353 mrg 2690: rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
1.37 oster 2691: {
2692: int s;
2693: RF_Raid_t *raidPtr;
1.186 perry 2694:
1.37 oster 2695: s = splbio();
2696: raidPtr = req->raidPtr;
2697: raidPtr->recon_in_progress = 1;
1.166 oster 2698: rf_ReconstructInPlace(raidPtr, req->col);
1.37 oster 2699: RF_Free(req, sizeof(*req));
2700: raidPtr->recon_in_progress = 0;
2701: splx(s);
2702:
2703: /* That's all... */
1.204 simonb 2704: kthread_exit(0); /* does not return */
1.48 oster 2705: }
2706:
1.213 christos 2707: static RF_AutoConfig_t *
2708: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276 mrg 2709: const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2710: unsigned secsize)
1.213 christos 2711: {
2712: int good_one = 0;
2713: RF_ComponentLabel_t *clabel;
2714: RF_AutoConfig_t *ac;
2715:
2716: clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2717: if (clabel == NULL) {
2718: oomem:
2719: while(ac_list) {
2720: ac = ac_list;
2721: if (ac->clabel)
2722: free(ac->clabel, M_RAIDFRAME);
2723: ac_list = ac_list->next;
2724: free(ac, M_RAIDFRAME);
2725: }
2726: printf("RAID auto config: out of memory!\n");
2727: return NULL; /* XXX probably should panic? */
2728: }
2729:
1.276 mrg 2730: if (!raidread_component_label(secsize, dev, vp, clabel)) {
2731: /* Got the label. Does it look reasonable? */
1.284 mrg 2732: if (rf_reasonable_label(clabel, numsecs) &&
1.282 enami 2733: (rf_component_label_partitionsize(clabel) <= size)) {
1.224 oster 2734: #ifdef DEBUG
1.276 mrg 2735: printf("Component on: %s: %llu\n",
1.213 christos 2736: cname, (unsigned long long)size);
1.276 mrg 2737: rf_print_component_label(clabel);
1.213 christos 2738: #endif
1.276 mrg 2739: /* if it's reasonable, add it, else ignore it. */
2740: ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.213 christos 2741: M_NOWAIT);
1.276 mrg 2742: if (ac == NULL) {
2743: free(clabel, M_RAIDFRAME);
2744: goto oomem;
2745: }
2746: strlcpy(ac->devname, cname, sizeof(ac->devname));
2747: ac->dev = dev;
2748: ac->vp = vp;
2749: ac->clabel = clabel;
2750: ac->next = ac_list;
2751: ac_list = ac;
2752: good_one = 1;
2753: }
1.213 christos 2754: }
2755: if (!good_one) {
2756: /* cleanup */
2757: free(clabel, M_RAIDFRAME);
2758: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2759: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213 christos 2760: vput(vp);
2761: }
2762: return ac_list;
2763: }
2764:
1.48 oster 2765: RF_AutoConfig_t *
1.259 cegger 2766: rf_find_raid_components(void)
1.48 oster 2767: {
2768: struct vnode *vp;
2769: struct disklabel label;
1.261 dyoung 2770: device_t dv;
1.268 dyoung 2771: deviter_t di;
1.48 oster 2772: dev_t dev;
1.296 buhrow 2773: int bmajor, bminor, wedge, rf_part_found;
1.48 oster 2774: int error;
2775: int i;
2776: RF_AutoConfig_t *ac_list;
1.276 mrg 2777: uint64_t numsecs;
2778: unsigned secsize;
1.335 mlelstv 2779: int dowedges;
1.48 oster 2780:
2781: /* initialize the AutoConfig list */
2782: ac_list = NULL;
2783:
1.335 mlelstv 2784: /*
2785: * we begin by trolling through *all* the devices on the system *twice*
2786: * first we scan for wedges, second for other devices. This avoids
2787: * using a raw partition instead of a wedge that covers the whole disk
2788: */
1.48 oster 2789:
1.335 mlelstv 2790: for (dowedges=1; dowedges>=0; --dowedges) {
2791: for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2792: dv = deviter_next(&di)) {
1.48 oster 2793:
1.335 mlelstv 2794: /* we are only interested in disks... */
2795: if (device_class(dv) != DV_DISK)
2796: continue;
1.48 oster 2797:
1.335 mlelstv 2798: /* we don't care about floppies... */
2799: if (device_is_a(dv, "fd")) {
2800: continue;
2801: }
1.129 oster 2802:
1.335 mlelstv 2803: /* we don't care about CD's... */
2804: if (device_is_a(dv, "cd")) {
2805: continue;
2806: }
1.129 oster 2807:
1.335 mlelstv 2808: /* we don't care about md's... */
2809: if (device_is_a(dv, "md")) {
2810: continue;
2811: }
1.248 oster 2812:
1.335 mlelstv 2813: /* hdfd is the Atari/Hades floppy driver */
2814: if (device_is_a(dv, "hdfd")) {
2815: continue;
2816: }
1.206 thorpej 2817:
1.335 mlelstv 2818: /* fdisa is the Atari/Milan floppy driver */
2819: if (device_is_a(dv, "fdisa")) {
2820: continue;
2821: }
1.186 perry 2822:
1.335 mlelstv 2823: /* are we in the wedges pass ? */
2824: wedge = device_is_a(dv, "dk");
2825: if (wedge != dowedges) {
2826: continue;
2827: }
1.48 oster 2828:
1.335 mlelstv 2829: /* need to find the device_name_to_block_device_major stuff */
2830: bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.296 buhrow 2831:
1.335 mlelstv 2832: rf_part_found = 0; /*No raid partition as yet*/
1.48 oster 2833:
1.335 mlelstv 2834: /* get a vnode for the raw partition of this disk */
2835: bminor = minor(device_unit(dv));
2836: dev = wedge ? makedev(bmajor, bminor) :
2837: MAKEDISKDEV(bmajor, bminor, RAW_PART);
2838: if (bdevvp(dev, &vp))
2839: panic("RAID can't alloc vnode");
1.48 oster 2840:
1.375 hannken 2841: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 2842: error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48 oster 2843:
1.335 mlelstv 2844: if (error) {
2845: /* "Who cares." Continue looking
2846: for something that exists*/
2847: vput(vp);
2848: continue;
2849: }
1.48 oster 2850:
1.335 mlelstv 2851: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 2852: if (error) {
1.339 mlelstv 2853: /*
2854: * Pseudo devices like vnd and cgd can be
2855: * opened but may still need some configuration.
2856: * Ignore these quietly.
2857: */
2858: if (error != ENXIO)
2859: printf("RAIDframe: can't get disk size"
2860: " for dev %s (%d)\n",
2861: device_xname(dv), error);
1.241 oster 2862: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2863: vput(vp);
1.213 christos 2864: continue;
2865: }
1.335 mlelstv 2866: if (wedge) {
2867: struct dkwedge_info dkw;
2868: error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2869: NOCRED);
2870: if (error) {
2871: printf("RAIDframe: can't get wedge info for "
2872: "dev %s (%d)\n", device_xname(dv), error);
2873: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2874: vput(vp);
2875: continue;
2876: }
1.213 christos 2877:
1.335 mlelstv 2878: if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2879: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2880: vput(vp);
2881: continue;
2882: }
2883:
1.375 hannken 2884: VOP_UNLOCK(vp);
1.335 mlelstv 2885: ac_list = rf_get_component(ac_list, dev, vp,
2886: device_xname(dv), dkw.dkw_size, numsecs, secsize);
2887: rf_part_found = 1; /*There is a raid component on this disk*/
1.228 christos 2888: continue;
1.241 oster 2889: }
1.213 christos 2890:
1.335 mlelstv 2891: /* Ok, the disk exists. Go get the disklabel. */
2892: error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2893: if (error) {
2894: /*
2895: * XXX can't happen - open() would
2896: * have errored out (or faked up one)
2897: */
2898: if (error != ENOTTY)
2899: printf("RAIDframe: can't get label for dev "
2900: "%s (%d)\n", device_xname(dv), error);
2901: }
1.48 oster 2902:
1.335 mlelstv 2903: /* don't need this any more. We'll allocate it again
2904: a little later if we really do... */
2905: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2906: vput(vp);
1.48 oster 2907:
1.335 mlelstv 2908: if (error)
1.48 oster 2909: continue;
2910:
1.335 mlelstv 2911: rf_part_found = 0; /*No raid partitions yet*/
2912: for (i = 0; i < label.d_npartitions; i++) {
2913: char cname[sizeof(ac_list->devname)];
2914:
2915: /* We only support partitions marked as RAID */
2916: if (label.d_partitions[i].p_fstype != FS_RAID)
2917: continue;
2918:
2919: dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2920: if (bdevvp(dev, &vp))
2921: panic("RAID can't alloc vnode");
2922:
1.375 hannken 2923: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 2924: error = VOP_OPEN(vp, FREAD, NOCRED);
2925: if (error) {
2926: /* Whatever... */
2927: vput(vp);
2928: continue;
2929: }
1.375 hannken 2930: VOP_UNLOCK(vp);
1.335 mlelstv 2931: snprintf(cname, sizeof(cname), "%s%c",
2932: device_xname(dv), 'a' + i);
2933: ac_list = rf_get_component(ac_list, dev, vp, cname,
2934: label.d_partitions[i].p_size, numsecs, secsize);
2935: rf_part_found = 1; /*There is at least one raid partition on this disk*/
1.48 oster 2936: }
1.296 buhrow 2937:
1.335 mlelstv 2938: /*
2939: *If there is no raid component on this disk, either in a
2940: *disklabel or inside a wedge, check the raw partition as well,
2941: *as it is possible to configure raid components on raw disk
2942: *devices.
2943: */
1.296 buhrow 2944:
1.335 mlelstv 2945: if (!rf_part_found) {
2946: char cname[sizeof(ac_list->devname)];
1.296 buhrow 2947:
1.335 mlelstv 2948: dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2949: if (bdevvp(dev, &vp))
2950: panic("RAID can't alloc vnode");
2951:
1.375 hannken 2952: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2953:
1.335 mlelstv 2954: error = VOP_OPEN(vp, FREAD, NOCRED);
2955: if (error) {
2956: /* Whatever... */
2957: vput(vp);
2958: continue;
2959: }
1.375 hannken 2960: VOP_UNLOCK(vp);
1.335 mlelstv 2961: snprintf(cname, sizeof(cname), "%s%c",
2962: device_xname(dv), 'a' + RAW_PART);
2963: ac_list = rf_get_component(ac_list, dev, vp, cname,
2964: label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.296 buhrow 2965: }
1.48 oster 2966: }
1.335 mlelstv 2967: deviter_release(&di);
1.48 oster 2968: }
1.213 christos 2969: return ac_list;
1.48 oster 2970: }
1.186 perry 2971:
1.213 christos 2972:
1.292 oster 2973: int
1.284 mrg 2974: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48 oster 2975: {
1.186 perry 2976:
1.48 oster 2977: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
2978: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
2979: ((clabel->clean == RF_RAID_CLEAN) ||
2980: (clabel->clean == RF_RAID_DIRTY)) &&
1.186 perry 2981: clabel->row >=0 &&
2982: clabel->column >= 0 &&
1.48 oster 2983: clabel->num_rows > 0 &&
2984: clabel->num_columns > 0 &&
1.186 perry 2985: clabel->row < clabel->num_rows &&
1.48 oster 2986: clabel->column < clabel->num_columns &&
2987: clabel->blockSize > 0 &&
1.282 enami 2988: /*
2989: * numBlocksHi may contain garbage, but it is ok since
2990: * the type is unsigned. If it is really garbage,
2991: * rf_fix_old_label_size() will fix it.
2992: */
2993: rf_component_label_numblocks(clabel) > 0) {
1.284 mrg 2994: /*
2995: * label looks reasonable enough...
2996: * let's make sure it has no old garbage.
2997: */
1.292 oster 2998: if (numsecs)
2999: rf_fix_old_label_size(clabel, numsecs);
1.48 oster 3000: return(1);
3001: }
3002: return(0);
3003: }
3004:
3005:
1.278 mrg 3006: /*
3007: * For reasons yet unknown, some old component labels have garbage in
3008: * the newer numBlocksHi region, and this causes lossage. Since those
3009: * disks will also have numsecs set to less than 32 bits of sectors,
1.299 oster 3010: * we can determine when this corruption has occurred, and fix it.
1.284 mrg 3011: *
3012: * The exact same problem, with the same unknown reason, happens to
3013: * the partitionSizeHi member as well.
1.278 mrg 3014: */
3015: static void
3016: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3017: {
3018:
1.284 mrg 3019: if (numsecs < ((uint64_t)1 << 32)) {
3020: if (clabel->numBlocksHi) {
3021: printf("WARNING: total sectors < 32 bits, yet "
3022: "numBlocksHi set\n"
3023: "WARNING: resetting numBlocksHi to zero.\n");
3024: clabel->numBlocksHi = 0;
3025: }
3026:
3027: if (clabel->partitionSizeHi) {
3028: printf("WARNING: total sectors < 32 bits, yet "
3029: "partitionSizeHi set\n"
3030: "WARNING: resetting partitionSizeHi to zero.\n");
3031: clabel->partitionSizeHi = 0;
3032: }
1.278 mrg 3033: }
3034: }
3035:
3036:
1.224 oster 3037: #ifdef DEBUG
1.48 oster 3038: void
1.169 oster 3039: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48 oster 3040: {
1.282 enami 3041: uint64_t numBlocks;
1.308 christos 3042: static const char *rp[] = {
3043: "No", "Force", "Soft", "*invalid*"
3044: };
3045:
1.275 mrg 3046:
1.282 enami 3047: numBlocks = rf_component_label_numblocks(clabel);
1.275 mrg 3048:
1.48 oster 3049: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186 perry 3050: clabel->row, clabel->column,
1.48 oster 3051: clabel->num_rows, clabel->num_columns);
3052: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3053: clabel->version, clabel->serial_number,
3054: clabel->mod_counter);
3055: printf(" Clean: %s Status: %d\n",
1.271 dyoung 3056: clabel->clean ? "Yes" : "No", clabel->status);
1.48 oster 3057: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3058: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275 mrg 3059: printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3060: (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271 dyoung 3061: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.308 christos 3062: printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
1.271 dyoung 3063: printf(" Last configured as: raid%d\n", clabel->last_unit);
1.51 oster 3064: #if 0
3065: printf(" Config order: %d\n", clabel->config_order);
3066: #endif
1.186 perry 3067:
1.48 oster 3068: }
1.133 oster 3069: #endif
1.48 oster 3070:
3071: RF_ConfigSet_t *
1.169 oster 3072: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48 oster 3073: {
3074: RF_AutoConfig_t *ac;
3075: RF_ConfigSet_t *config_sets;
3076: RF_ConfigSet_t *cset;
3077: RF_AutoConfig_t *ac_next;
3078:
3079:
3080: config_sets = NULL;
3081:
3082: /* Go through the AutoConfig list, and figure out which components
3083: belong to what sets. */
3084: ac = ac_list;
3085: while(ac!=NULL) {
3086: /* we're going to putz with ac->next, so save it here
3087: for use at the end of the loop */
3088: ac_next = ac->next;
3089:
3090: if (config_sets == NULL) {
3091: /* will need at least this one... */
3092: config_sets = (RF_ConfigSet_t *)
1.186 perry 3093: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3094: M_RAIDFRAME, M_NOWAIT);
3095: if (config_sets == NULL) {
1.141 provos 3096: panic("rf_create_auto_sets: No memory!");
1.48 oster 3097: }
3098: /* this one is easy :) */
3099: config_sets->ac = ac;
3100: config_sets->next = NULL;
1.51 oster 3101: config_sets->rootable = 0;
1.48 oster 3102: ac->next = NULL;
3103: } else {
3104: /* which set does this component fit into? */
3105: cset = config_sets;
3106: while(cset!=NULL) {
1.49 oster 3107: if (rf_does_it_fit(cset, ac)) {
1.86 oster 3108: /* looks like it matches... */
3109: ac->next = cset->ac;
3110: cset->ac = ac;
1.48 oster 3111: break;
3112: }
3113: cset = cset->next;
3114: }
3115: if (cset==NULL) {
3116: /* didn't find a match above... new set..*/
3117: cset = (RF_ConfigSet_t *)
1.186 perry 3118: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3119: M_RAIDFRAME, M_NOWAIT);
3120: if (cset == NULL) {
1.141 provos 3121: panic("rf_create_auto_sets: No memory!");
1.48 oster 3122: }
3123: cset->ac = ac;
3124: ac->next = NULL;
3125: cset->next = config_sets;
1.51 oster 3126: cset->rootable = 0;
1.48 oster 3127: config_sets = cset;
3128: }
3129: }
3130: ac = ac_next;
3131: }
3132:
3133:
3134: return(config_sets);
3135: }
3136:
3137: static int
1.169 oster 3138: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48 oster 3139: {
3140: RF_ComponentLabel_t *clabel1, *clabel2;
3141:
3142: /* If this one matches the *first* one in the set, that's good
3143: enough, since the other members of the set would have been
3144: through here too... */
1.60 oster 3145: /* note that we are not checking partitionSize here..
3146:
3147: Note that we are also not checking the mod_counters here.
1.299 oster 3148: If everything else matches except the mod_counter, that's
1.60 oster 3149: good enough for this test. We will deal with the mod_counters
1.186 perry 3150: a little later in the autoconfiguration process.
1.60 oster 3151:
3152: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 3153:
3154: The reason we don't check for this is that failed disks
3155: will have lower modification counts. If those disks are
3156: not added to the set they used to belong to, then they will
3157: form their own set, which may result in 2 different sets,
3158: for example, competing to be configured at raid0, and
3159: perhaps competing to be the root filesystem set. If the
3160: wrong ones get configured, or both attempt to become /,
3161: weird behaviour and or serious lossage will occur. Thus we
3162: need to bring them into the fold here, and kick them out at
3163: a later point.
1.60 oster 3164:
3165: */
1.48 oster 3166:
3167: clabel1 = cset->ac->clabel;
3168: clabel2 = ac->clabel;
3169: if ((clabel1->version == clabel2->version) &&
3170: (clabel1->serial_number == clabel2->serial_number) &&
3171: (clabel1->num_rows == clabel2->num_rows) &&
3172: (clabel1->num_columns == clabel2->num_columns) &&
3173: (clabel1->sectPerSU == clabel2->sectPerSU) &&
3174: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3175: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3176: (clabel1->parityConfig == clabel2->parityConfig) &&
3177: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3178: (clabel1->blockSize == clabel2->blockSize) &&
1.282 enami 3179: rf_component_label_numblocks(clabel1) ==
3180: rf_component_label_numblocks(clabel2) &&
1.48 oster 3181: (clabel1->autoconfigure == clabel2->autoconfigure) &&
3182: (clabel1->root_partition == clabel2->root_partition) &&
3183: (clabel1->last_unit == clabel2->last_unit) &&
3184: (clabel1->config_order == clabel2->config_order)) {
3185: /* if it get's here, it almost *has* to be a match */
3186: } else {
1.186 perry 3187: /* it's not consistent with somebody in the set..
1.48 oster 3188: punt */
3189: return(0);
3190: }
3191: /* all was fine.. it must fit... */
3192: return(1);
3193: }
3194:
3195: int
1.169 oster 3196: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48 oster 3197: {
1.51 oster 3198: RF_AutoConfig_t *ac;
3199: RF_AutoConfig_t *auto_config;
3200: RF_ComponentLabel_t *clabel;
1.166 oster 3201: int c;
1.51 oster 3202: int num_cols;
3203: int num_missing;
1.86 oster 3204: int mod_counter;
1.87 oster 3205: int mod_counter_found;
1.88 oster 3206: int even_pair_failed;
3207: char parity_type;
1.186 perry 3208:
1.51 oster 3209:
1.48 oster 3210: /* check to see that we have enough 'live' components
3211: of this set. If so, we can configure it if necessary */
3212:
1.51 oster 3213: num_cols = cset->ac->clabel->num_columns;
1.88 oster 3214: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 3215:
3216: /* XXX Check for duplicate components!?!?!? */
3217:
1.86 oster 3218: /* Determine what the mod_counter is supposed to be for this set. */
3219:
1.87 oster 3220: mod_counter_found = 0;
1.101 oster 3221: mod_counter = 0;
1.86 oster 3222: ac = cset->ac;
3223: while(ac!=NULL) {
1.87 oster 3224: if (mod_counter_found==0) {
1.86 oster 3225: mod_counter = ac->clabel->mod_counter;
1.87 oster 3226: mod_counter_found = 1;
3227: } else {
3228: if (ac->clabel->mod_counter > mod_counter) {
3229: mod_counter = ac->clabel->mod_counter;
3230: }
1.86 oster 3231: }
3232: ac = ac->next;
3233: }
3234:
1.51 oster 3235: num_missing = 0;
3236: auto_config = cset->ac;
3237:
1.166 oster 3238: even_pair_failed = 0;
3239: for(c=0; c<num_cols; c++) {
3240: ac = auto_config;
3241: while(ac!=NULL) {
1.186 perry 3242: if ((ac->clabel->column == c) &&
1.166 oster 3243: (ac->clabel->mod_counter == mod_counter)) {
3244: /* it's this one... */
1.224 oster 3245: #ifdef DEBUG
1.166 oster 3246: printf("Found: %s at %d\n",
3247: ac->devname,c);
1.51 oster 3248: #endif
1.166 oster 3249: break;
1.51 oster 3250: }
1.166 oster 3251: ac=ac->next;
3252: }
3253: if (ac==NULL) {
1.51 oster 3254: /* Didn't find one here! */
1.88 oster 3255: /* special case for RAID 1, especially
3256: where there are more than 2
3257: components (where RAIDframe treats
3258: things a little differently :( ) */
1.166 oster 3259: if (parity_type == '1') {
3260: if (c%2 == 0) { /* even component */
3261: even_pair_failed = 1;
3262: } else { /* odd component. If
3263: we're failed, and
3264: so is the even
3265: component, it's
3266: "Good Night, Charlie" */
3267: if (even_pair_failed == 1) {
3268: return(0);
1.88 oster 3269: }
3270: }
1.166 oster 3271: } else {
3272: /* normal accounting */
3273: num_missing++;
1.88 oster 3274: }
1.166 oster 3275: }
3276: if ((parity_type == '1') && (c%2 == 1)) {
1.88 oster 3277: /* Just did an even component, and we didn't
1.186 perry 3278: bail.. reset the even_pair_failed flag,
1.88 oster 3279: and go on to the next component.... */
1.166 oster 3280: even_pair_failed = 0;
1.51 oster 3281: }
3282: }
3283:
3284: clabel = cset->ac->clabel;
3285:
3286: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3287: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3288: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3289: /* XXX this needs to be made *much* more general */
3290: /* Too many failures */
3291: return(0);
3292: }
3293: /* otherwise, all is well, and we've got enough to take a kick
3294: at autoconfiguring this set */
3295: return(1);
1.48 oster 3296: }
3297:
3298: void
1.169 oster 3299: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222 christos 3300: RF_Raid_t *raidPtr)
1.48 oster 3301: {
3302: RF_ComponentLabel_t *clabel;
1.77 oster 3303: int i;
1.48 oster 3304:
3305: clabel = ac->clabel;
3306:
3307: /* 1. Fill in the common stuff */
3308: config->numCol = clabel->num_columns;
3309: config->numSpare = 0; /* XXX should this be set here? */
3310: config->sectPerSU = clabel->sectPerSU;
3311: config->SUsPerPU = clabel->SUsPerPU;
3312: config->SUsPerRU = clabel->SUsPerRU;
3313: config->parityConfig = clabel->parityConfig;
3314: /* XXX... */
3315: strcpy(config->diskQueueType,"fifo");
3316: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3317: config->layoutSpecificSize = 0; /* XXX ?? */
3318:
3319: while(ac!=NULL) {
3320: /* row/col values will be in range due to the checks
3321: in reasonable_label() */
1.166 oster 3322: strcpy(config->devnames[0][ac->clabel->column],
1.48 oster 3323: ac->devname);
3324: ac = ac->next;
3325: }
3326:
1.77 oster 3327: for(i=0;i<RF_MAXDBGV;i++) {
1.163 fvdl 3328: config->debugVars[i][0] = 0;
1.77 oster 3329: }
1.48 oster 3330: }
3331:
3332: int
1.169 oster 3333: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3334: {
1.269 jld 3335: RF_ComponentLabel_t *clabel;
1.166 oster 3336: int column;
1.148 oster 3337: int sparecol;
1.48 oster 3338:
1.54 oster 3339: raidPtr->autoconfigure = new_value;
1.166 oster 3340:
3341: for(column=0; column<raidPtr->numCol; column++) {
3342: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3343: clabel = raidget_component_label(raidPtr, column);
3344: clabel->autoconfigure = new_value;
3345: raidflush_component_label(raidPtr, column);
1.48 oster 3346: }
3347: }
1.148 oster 3348: for(column = 0; column < raidPtr->numSpare ; column++) {
3349: sparecol = raidPtr->numCol + column;
1.166 oster 3350: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3351: clabel = raidget_component_label(raidPtr, sparecol);
3352: clabel->autoconfigure = new_value;
3353: raidflush_component_label(raidPtr, sparecol);
1.148 oster 3354: }
3355: }
1.48 oster 3356: return(new_value);
3357: }
3358:
3359: int
1.169 oster 3360: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3361: {
1.269 jld 3362: RF_ComponentLabel_t *clabel;
1.166 oster 3363: int column;
1.148 oster 3364: int sparecol;
1.48 oster 3365:
1.54 oster 3366: raidPtr->root_partition = new_value;
1.166 oster 3367: for(column=0; column<raidPtr->numCol; column++) {
3368: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3369: clabel = raidget_component_label(raidPtr, column);
3370: clabel->root_partition = new_value;
3371: raidflush_component_label(raidPtr, column);
1.148 oster 3372: }
3373: }
3374: for(column = 0; column < raidPtr->numSpare ; column++) {
3375: sparecol = raidPtr->numCol + column;
1.166 oster 3376: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3377: clabel = raidget_component_label(raidPtr, sparecol);
3378: clabel->root_partition = new_value;
3379: raidflush_component_label(raidPtr, sparecol);
1.48 oster 3380: }
3381: }
3382: return(new_value);
3383: }
3384:
3385: void
1.169 oster 3386: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48 oster 3387: {
3388: RF_AutoConfig_t *ac;
1.186 perry 3389:
1.48 oster 3390: ac = cset->ac;
3391: while(ac!=NULL) {
3392: /* Close the vp, and give it back */
3393: if (ac->vp) {
1.96 oster 3394: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 3395: VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
1.48 oster 3396: vput(ac->vp);
1.86 oster 3397: ac->vp = NULL;
1.48 oster 3398: }
3399: ac = ac->next;
3400: }
3401: }
3402:
3403:
3404: void
1.169 oster 3405: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48 oster 3406: {
3407: RF_AutoConfig_t *ac;
3408: RF_AutoConfig_t *next_ac;
1.186 perry 3409:
1.48 oster 3410: ac = cset->ac;
3411: while(ac!=NULL) {
3412: next_ac = ac->next;
3413: /* nuke the label */
3414: free(ac->clabel, M_RAIDFRAME);
3415: /* cleanup the config structure */
3416: free(ac, M_RAIDFRAME);
3417: /* "next.." */
3418: ac = next_ac;
3419: }
3420: /* and, finally, nuke the config set */
3421: free(cset, M_RAIDFRAME);
3422: }
3423:
3424:
3425: void
1.169 oster 3426: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48 oster 3427: {
3428: /* current version number */
1.186 perry 3429: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3430: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3431: clabel->mod_counter = raidPtr->mod_counter;
1.269 jld 3432:
1.166 oster 3433: clabel->num_rows = 1;
1.48 oster 3434: clabel->num_columns = raidPtr->numCol;
3435: clabel->clean = RF_RAID_DIRTY; /* not clean */
3436: clabel->status = rf_ds_optimal; /* "It's good!" */
1.186 perry 3437:
1.48 oster 3438: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3439: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3440: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3441:
3442: clabel->blockSize = raidPtr->bytesPerSector;
1.282 enami 3443: rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54 oster 3444:
1.48 oster 3445: /* XXX not portable */
3446: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3447: clabel->maxOutstanding = raidPtr->maxOutstanding;
3448: clabel->autoconfigure = raidPtr->autoconfigure;
3449: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3450: clabel->last_unit = raidPtr->raidid;
1.54 oster 3451: clabel->config_order = raidPtr->config_order;
1.269 jld 3452:
3453: #ifndef RF_NO_PARITY_MAP
3454: rf_paritymap_init_label(raidPtr->parity_map, clabel);
3455: #endif
1.51 oster 3456: }
3457:
1.300 christos 3458: struct raid_softc *
3459: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51 oster 3460: {
3461: RF_Raid_t *raidPtr;
3462: RF_Config_t *config;
3463: int raidID;
1.300 christos 3464: struct raid_softc *sc;
1.51 oster 3465:
1.224 oster 3466: #ifdef DEBUG
1.72 oster 3467: printf("RAID autoconfigure\n");
1.127 oster 3468: #endif
1.51 oster 3469:
3470: /* 1. Create a config structure */
1.300 christos 3471: config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3472: if (config == NULL) {
1.327 pgoyette 3473: printf("%s: Out of mem - config!?!?\n", __func__);
1.51 oster 3474: /* XXX do something more intelligent here. */
1.300 christos 3475: return NULL;
1.51 oster 3476: }
1.77 oster 3477:
1.186 perry 3478: /*
3479: 2. Figure out what RAID ID this one is supposed to live at
1.51 oster 3480: See if we can get the same RAID dev that it was configured
1.186 perry 3481: on last time..
1.51 oster 3482: */
3483:
3484: raidID = cset->ac->clabel->last_unit;
1.327 pgoyette 3485: for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3486: sc = raidget(++raidID, false))
1.300 christos 3487: continue;
1.224 oster 3488: #ifdef DEBUG
1.72 oster 3489: printf("Configuring raid%d:\n",raidID);
1.127 oster 3490: #endif
3491:
1.327 pgoyette 3492: if (sc == NULL)
3493: sc = raidget(raidID, true);
3494: if (sc == NULL) {
3495: printf("%s: Out of mem - softc!?!?\n", __func__);
3496: /* XXX do something more intelligent here. */
3497: free(config, M_RAIDFRAME);
3498: return NULL;
3499: }
3500:
1.300 christos 3501: raidPtr = &sc->sc_r;
1.51 oster 3502:
3503: /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302 christos 3504: raidPtr->softc = sc;
1.51 oster 3505: raidPtr->raidid = raidID;
3506: raidPtr->openings = RAIDOUTSTANDING;
3507:
3508: /* 3. Build the configuration structure */
3509: rf_create_configuration(cset->ac, config, raidPtr);
3510:
3511: /* 4. Do the configuration */
1.300 christos 3512: if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3513: raidinit(sc);
1.186 perry 3514:
1.300 christos 3515: rf_markalldirty(raidPtr);
3516: raidPtr->autoconfigure = 1; /* XXX do this here? */
1.308 christos 3517: switch (cset->ac->clabel->root_partition) {
3518: case 1: /* Force Root */
3519: case 2: /* Soft Root: root when boot partition part of raid */
3520: /*
3521: * everything configured just fine. Make a note
3522: * that this set is eligible to be root,
3523: * or forced to be root
3524: */
3525: cset->rootable = cset->ac->clabel->root_partition;
1.54 oster 3526: /* XXX do this here? */
1.308 christos 3527: raidPtr->root_partition = cset->rootable;
3528: break;
3529: default:
3530: break;
1.51 oster 3531: }
1.300 christos 3532: } else {
3533: raidput(sc);
3534: sc = NULL;
1.51 oster 3535: }
3536:
3537: /* 5. Cleanup */
3538: free(config, M_RAIDFRAME);
1.300 christos 3539: return sc;
1.99 oster 3540: }
3541:
3542: void
1.187 christos 3543: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3544: size_t xmin, size_t xmax)
1.177 oster 3545: {
1.352 christos 3546: int error;
3547:
1.227 ad 3548: pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187 christos 3549: pool_sethiwat(p, xmax);
1.352 christos 3550: if ((error = pool_prime(p, xmin)) != 0)
3551: panic("%s: failed to prime pool: %d", __func__, error);
1.187 christos 3552: pool_setlowat(p, xmin);
1.177 oster 3553: }
1.190 oster 3554:
3555: /*
1.335 mlelstv 3556: * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3557: * to see if there is IO pending and if that IO could possibly be done
3558: * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
1.190 oster 3559: * otherwise.
3560: *
3561: */
3562: int
1.300 christos 3563: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190 oster 3564: {
1.335 mlelstv 3565: struct raid_softc *rs;
3566: struct dk_softc *dksc;
3567:
3568: rs = raidPtr->softc;
3569: dksc = &rs->sc_dksc;
3570:
3571: if ((rs->sc_flags & RAIDF_INITED) == 0)
3572: return 1;
3573:
3574: if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
1.190 oster 3575: /* there is work to do */
3576: return 0;
1.335 mlelstv 3577: }
1.190 oster 3578: /* default is nothing to do */
3579: return 1;
3580: }
1.213 christos 3581:
3582: int
1.294 oster 3583: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213 christos 3584: {
1.275 mrg 3585: uint64_t numsecs;
3586: unsigned secsize;
1.213 christos 3587: int error;
3588:
1.275 mrg 3589: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 3590: if (error == 0) {
1.275 mrg 3591: diskPtr->blockSize = secsize;
3592: diskPtr->numBlocks = numsecs - rf_protectedSectors;
3593: diskPtr->partitionSize = numsecs;
1.213 christos 3594: return 0;
3595: }
3596: return error;
3597: }
1.217 oster 3598:
3599: static int
1.261 dyoung 3600: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217 oster 3601: {
3602: return 1;
3603: }
3604:
3605: static void
1.261 dyoung 3606: raid_attach(device_t parent, device_t self, void *aux)
1.217 oster 3607: {
3608: }
3609:
3610:
3611: static int
1.261 dyoung 3612: raid_detach(device_t self, int flags)
1.217 oster 3613: {
1.266 dyoung 3614: int error;
1.335 mlelstv 3615: struct raid_softc *rs = raidsoftc(self);
1.303 christos 3616:
3617: if (rs == NULL)
3618: return ENXIO;
1.266 dyoung 3619:
3620: if ((error = raidlock(rs)) != 0)
3621: return (error);
1.217 oster 3622:
1.266 dyoung 3623: error = raid_detach_unlocked(rs);
3624:
1.332 mlelstv 3625: raidunlock(rs);
3626:
3627: /* XXX raid can be referenced here */
3628:
3629: if (error)
3630: return error;
3631:
3632: /* Free the softc */
3633: raidput(rs);
3634:
3635: return 0;
1.217 oster 3636: }
3637:
1.234 oster 3638: static void
1.304 christos 3639: rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
1.234 oster 3640: {
1.335 mlelstv 3641: struct dk_softc *dksc = &rs->sc_dksc;
3642: struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1.304 christos 3643:
3644: memset(dg, 0, sizeof(*dg));
3645:
3646: dg->dg_secperunit = raidPtr->totalSectors;
3647: dg->dg_secsize = raidPtr->bytesPerSector;
3648: dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3649: dg->dg_ntracks = 4 * raidPtr->numCol;
3650:
1.335 mlelstv 3651: disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1.234 oster 3652: }
1.252 oster 3653:
1.348 jdolecek 3654: /*
3655: * Get cache info for all the components (including spares).
3656: * Returns intersection of all the cache flags of all disks, or first
3657: * error if any encountered.
3658: * XXXfua feature flags can change as spares are added - lock down somehow
3659: */
3660: static int
3661: rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3662: {
3663: int c;
3664: int error;
3665: int dkwhole = 0, dkpart;
3666:
3667: for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3668: /*
3669: * Check any non-dead disk, even when currently being
3670: * reconstructed.
3671: */
3672: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3673: || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3674: error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3675: DIOCGCACHE, &dkpart, FREAD, NOCRED);
3676: if (error) {
3677: if (error != ENODEV) {
3678: printf("raid%d: get cache for component %s failed\n",
3679: raidPtr->raidid,
3680: raidPtr->Disks[c].devname);
3681: }
3682:
3683: return error;
3684: }
3685:
3686: if (c == 0)
3687: dkwhole = dkpart;
3688: else
3689: dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3690: }
3691: }
3692:
1.349 jdolecek 3693: *data = dkwhole;
1.348 jdolecek 3694:
3695: return 0;
3696: }
3697:
1.252 oster 3698: /*
3699: * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3700: * We end up returning whatever error was returned by the first cache flush
3701: * that fails.
3702: */
3703:
1.269 jld 3704: int
1.252 oster 3705: rf_sync_component_caches(RF_Raid_t *raidPtr)
3706: {
3707: int c, sparecol;
3708: int e,error;
3709: int force = 1;
3710:
3711: error = 0;
3712: for (c = 0; c < raidPtr->numCol; c++) {
3713: if (raidPtr->Disks[c].status == rf_ds_optimal) {
3714: e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3715: &force, FWRITE, NOCRED);
3716: if (e) {
1.255 oster 3717: if (e != ENODEV)
3718: printf("raid%d: cache flush to component %s failed.\n",
3719: raidPtr->raidid, raidPtr->Disks[c].devname);
1.252 oster 3720: if (error == 0) {
3721: error = e;
3722: }
3723: }
3724: }
3725: }
3726:
3727: for( c = 0; c < raidPtr->numSpare ; c++) {
3728: sparecol = raidPtr->numCol + c;
3729: /* Need to ensure that the reconstruct actually completed! */
3730: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3731: e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3732: DIOCCACHESYNC, &force, FWRITE, NOCRED);
3733: if (e) {
1.255 oster 3734: if (e != ENODEV)
3735: printf("raid%d: cache flush to component %s failed.\n",
3736: raidPtr->raidid, raidPtr->Disks[sparecol].devname);
1.252 oster 3737: if (error == 0) {
3738: error = e;
3739: }
3740: }
3741: }
3742: }
3743: return error;
3744: }
1.327 pgoyette 3745:
1.353 mrg 3746: /* Fill in info with the current status */
3747: void
3748: rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3749: {
3750:
3751: if (raidPtr->status != rf_rs_reconstructing) {
3752: info->total = 100;
3753: info->completed = 100;
3754: } else {
3755: info->total = raidPtr->reconControl->numRUsTotal;
3756: info->completed = raidPtr->reconControl->numRUsComplete;
3757: }
3758: info->remaining = info->total - info->completed;
3759: }
3760:
3761: /* Fill in info with the current status */
3762: void
3763: rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3764: {
3765:
3766: if (raidPtr->parity_rewrite_in_progress == 1) {
3767: info->total = raidPtr->Layout.numStripe;
3768: info->completed = raidPtr->parity_rewrite_stripes_done;
3769: } else {
3770: info->completed = 100;
3771: info->total = 100;
3772: }
3773: info->remaining = info->total - info->completed;
3774: }
3775:
3776: /* Fill in info with the current status */
3777: void
3778: rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3779: {
3780:
3781: if (raidPtr->copyback_in_progress == 1) {
3782: info->total = raidPtr->Layout.numStripe;
3783: info->completed = raidPtr->copyback_stripes_done;
3784: info->remaining = info->total - info->completed;
3785: } else {
3786: info->remaining = 0;
3787: info->completed = 100;
3788: info->total = 100;
3789: }
3790: }
3791:
3792: /* Fill in config with the current info */
3793: int
3794: rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3795: {
3796: int d, i, j;
3797:
3798: if (!raidPtr->valid)
3799: return (ENODEV);
3800: config->cols = raidPtr->numCol;
3801: config->ndevs = raidPtr->numCol;
3802: if (config->ndevs >= RF_MAX_DISKS)
3803: return (ENOMEM);
3804: config->nspares = raidPtr->numSpare;
3805: if (config->nspares >= RF_MAX_DISKS)
3806: return (ENOMEM);
3807: config->maxqdepth = raidPtr->maxQueueDepth;
3808: d = 0;
3809: for (j = 0; j < config->cols; j++) {
3810: config->devs[d] = raidPtr->Disks[j];
3811: d++;
3812: }
3813: for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3814: config->spares[i] = raidPtr->Disks[j];
3815: if (config->spares[i].status == rf_ds_rebuilding_spare) {
3816: /* XXX: raidctl(8) expects to see this as a used spare */
3817: config->spares[i].status = rf_ds_used_spare;
3818: }
3819: }
3820: return 0;
3821: }
3822:
3823: int
3824: rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3825: {
3826: RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3827: RF_ComponentLabel_t *raid_clabel;
3828: int column = clabel->column;
3829:
3830: if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3831: return EINVAL;
3832: raid_clabel = raidget_component_label(raidPtr, column);
3833: memcpy(clabel, raid_clabel, sizeof *clabel);
3834:
3835: return 0;
3836: }
3837:
1.327 pgoyette 3838: /*
3839: * Module interface
3840: */
3841:
1.356 pgoyette 3842: MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
1.327 pgoyette 3843:
3844: #ifdef _MODULE
3845: CFDRIVER_DECL(raid, DV_DISK, NULL);
3846: #endif
3847:
3848: static int raid_modcmd(modcmd_t, void *);
3849: static int raid_modcmd_init(void);
3850: static int raid_modcmd_fini(void);
3851:
3852: static int
3853: raid_modcmd(modcmd_t cmd, void *data)
3854: {
3855: int error;
3856:
3857: error = 0;
3858: switch (cmd) {
3859: case MODULE_CMD_INIT:
3860: error = raid_modcmd_init();
3861: break;
3862: case MODULE_CMD_FINI:
3863: error = raid_modcmd_fini();
3864: break;
3865: default:
3866: error = ENOTTY;
3867: break;
3868: }
3869: return error;
3870: }
3871:
3872: static int
3873: raid_modcmd_init(void)
3874: {
3875: int error;
3876: int bmajor, cmajor;
3877:
3878: mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3879: mutex_enter(&raid_lock);
3880: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3881: rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3882: rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3883: rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3884:
3885: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3886: #endif
3887:
3888: bmajor = cmajor = -1;
3889: error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3890: &raid_cdevsw, &cmajor);
3891: if (error != 0 && error != EEXIST) {
3892: aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3893: mutex_exit(&raid_lock);
3894: return error;
3895: }
3896: #ifdef _MODULE
3897: error = config_cfdriver_attach(&raid_cd);
3898: if (error != 0) {
3899: aprint_error("%s: config_cfdriver_attach failed %d\n",
3900: __func__, error);
3901: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3902: mutex_exit(&raid_lock);
3903: return error;
3904: }
3905: #endif
3906: error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3907: if (error != 0) {
3908: aprint_error("%s: config_cfattach_attach failed %d\n",
3909: __func__, error);
3910: #ifdef _MODULE
3911: config_cfdriver_detach(&raid_cd);
3912: #endif
3913: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3914: mutex_exit(&raid_lock);
3915: return error;
3916: }
3917:
3918: raidautoconfigdone = false;
3919:
3920: mutex_exit(&raid_lock);
3921:
3922: if (error == 0) {
3923: if (rf_BootRaidframe(true) == 0)
3924: aprint_verbose("Kernelized RAIDframe activated\n");
3925: else
3926: panic("Serious error activating RAID!!");
3927: }
3928:
3929: /*
3930: * Register a finalizer which will be used to auto-config RAID
3931: * sets once all real hardware devices have been found.
3932: */
3933: error = config_finalize_register(NULL, rf_autoconfig);
3934: if (error != 0) {
3935: aprint_error("WARNING: unable to register RAIDframe "
3936: "finalizer\n");
1.329 pgoyette 3937: error = 0;
1.327 pgoyette 3938: }
3939:
3940: return error;
3941: }
3942:
3943: static int
3944: raid_modcmd_fini(void)
3945: {
3946: int error;
3947:
3948: mutex_enter(&raid_lock);
3949:
3950: /* Don't allow unload if raid device(s) exist. */
3951: if (!LIST_EMPTY(&raids)) {
3952: mutex_exit(&raid_lock);
3953: return EBUSY;
3954: }
3955:
3956: error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3957: if (error != 0) {
1.335 mlelstv 3958: aprint_error("%s: cannot detach cfattach\n",__func__);
1.327 pgoyette 3959: mutex_exit(&raid_lock);
3960: return error;
3961: }
3962: #ifdef _MODULE
3963: error = config_cfdriver_detach(&raid_cd);
3964: if (error != 0) {
1.335 mlelstv 3965: aprint_error("%s: cannot detach cfdriver\n",__func__);
1.327 pgoyette 3966: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3967: mutex_exit(&raid_lock);
3968: return error;
3969: }
3970: #endif
3971: error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3972: if (error != 0) {
1.335 mlelstv 3973: aprint_error("%s: cannot detach devsw\n",__func__);
1.327 pgoyette 3974: #ifdef _MODULE
3975: config_cfdriver_attach(&raid_cd);
3976: #endif
3977: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3978: mutex_exit(&raid_lock);
3979: return error;
3980: }
3981: rf_BootRaidframe(false);
3982: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3983: rf_destroy_mutex2(rf_sparet_wait_mutex);
3984: rf_destroy_cond2(rf_sparet_wait_cv);
3985: rf_destroy_cond2(rf_sparet_resp_cv);
3986: #endif
3987: mutex_exit(&raid_lock);
3988: mutex_destroy(&raid_lock);
3989:
3990: return error;
3991: }
CVSweb <webmaster@jp.NetBSD.org>