Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.394
1.394 ! mrg 1: /* $NetBSD: rf_netbsdkintf.c,v 1.393 2021/05/24 07:43:15 mrg Exp $ */
1.281 rmind 2:
1.1 oster 3: /*-
1.295 erh 4: * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1 oster 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Greg Oster; Jason R. Thorpe.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
1.281 rmind 33: * Copyright (c) 1988 University of Utah.
1.1 oster 34: * Copyright (c) 1990, 1993
35: * The Regents of the University of California. All rights reserved.
36: *
37: * This code is derived from software contributed to Berkeley by
38: * the Systems Programming Group of the University of Utah Computer
39: * Science Department.
40: *
41: * Redistribution and use in source and binary forms, with or without
42: * modification, are permitted provided that the following conditions
43: * are met:
44: * 1. Redistributions of source code must retain the above copyright
45: * notice, this list of conditions and the following disclaimer.
46: * 2. Redistributions in binary form must reproduce the above copyright
47: * notice, this list of conditions and the following disclaimer in the
48: * documentation and/or other materials provided with the distribution.
1.162 agc 49: * 3. Neither the name of the University nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63: * SUCH DAMAGE.
64: *
1.381 riastrad 65: * from: Utah $Hdr: cd.c 1.6 90/11/28$
1.162 agc 66: *
67: * @(#)cd.c 8.2 (Berkeley) 11/16/93
68: */
69:
70: /*
1.1 oster 71: * Copyright (c) 1995 Carnegie-Mellon University.
72: * All rights reserved.
73: *
74: * Authors: Mark Holland, Jim Zelenka
75: *
76: * Permission to use, copy, modify and distribute this software and
77: * its documentation is hereby granted, provided that both the copyright
78: * notice and this permission notice appear in all copies of the
79: * software, derivative works or modified versions, and any portions
80: * thereof, and that both notices appear in supporting documentation.
81: *
82: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85: *
86: * Carnegie Mellon requests users of this software to return to
87: *
88: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
89: * School of Computer Science
90: * Carnegie Mellon University
91: * Pittsburgh PA 15213-3890
92: *
93: * any improvements or extensions that they make and grant Carnegie the
94: * rights to redistribute these changes.
95: */
96:
97: /***********************************************************
98: *
99: * rf_kintf.c -- the kernel interface routines for RAIDframe
100: *
101: ***********************************************************/
1.112 lukem 102:
103: #include <sys/cdefs.h>
1.394 ! mrg 104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.393 2021/05/24 07:43:15 mrg Exp $");
1.251 ad 105:
106: #ifdef _KERNEL_OPT
107: #include "opt_raid_autoconfig.h"
1.363 mrg 108: #include "opt_compat_netbsd32.h"
1.251 ad 109: #endif
1.1 oster 110:
1.113 lukem 111: #include <sys/param.h>
1.1 oster 112: #include <sys/errno.h>
113: #include <sys/pool.h>
1.152 thorpej 114: #include <sys/proc.h>
1.1 oster 115: #include <sys/queue.h>
116: #include <sys/disk.h>
117: #include <sys/device.h>
118: #include <sys/stat.h>
119: #include <sys/ioctl.h>
120: #include <sys/fcntl.h>
121: #include <sys/systm.h>
122: #include <sys/vnode.h>
123: #include <sys/disklabel.h>
124: #include <sys/conf.h>
125: #include <sys/buf.h>
1.182 yamt 126: #include <sys/bufq.h>
1.65 oster 127: #include <sys/reboot.h>
1.208 elad 128: #include <sys/kauth.h>
1.327 pgoyette 129: #include <sys/module.h>
1.358 pgoyette 130: #include <sys/compat_stub.h>
1.8 oster 131:
1.234 oster 132: #include <prop/proplib.h>
133:
1.110 oster 134: #include <dev/raidframe/raidframevar.h>
135: #include <dev/raidframe/raidframeio.h>
1.269 jld 136: #include <dev/raidframe/rf_paritymap.h>
1.251 ad 137:
1.1 oster 138: #include "rf_raid.h"
1.44 oster 139: #include "rf_copyback.h"
1.1 oster 140: #include "rf_dag.h"
141: #include "rf_dagflags.h"
1.99 oster 142: #include "rf_desc.h"
1.1 oster 143: #include "rf_diskqueue.h"
144: #include "rf_etimer.h"
145: #include "rf_general.h"
146: #include "rf_kintf.h"
147: #include "rf_options.h"
148: #include "rf_driver.h"
149: #include "rf_parityscan.h"
150: #include "rf_threadstuff.h"
151:
1.325 christos 152: #include "ioconf.h"
153:
1.133 oster 154: #ifdef DEBUG
1.9 oster 155: int rf_kdebug_level = 0;
1.1 oster 156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 157: #else /* DEBUG */
1.1 oster 158: #define db1_printf(a) { }
1.9 oster 159: #endif /* DEBUG */
1.1 oster 160:
1.344 christos 161: #ifdef DEBUG_ROOT
162: #define DPRINTF(a, ...) printf(a, __VA_ARGS__)
1.345 christos 163: #else
164: #define DPRINTF(a, ...)
1.344 christos 165: #endif
166:
1.249 oster 167: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289 mrg 168: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287 mrg 169: static rf_declare_cond2(rf_sparet_wait_cv);
170: static rf_declare_cond2(rf_sparet_resp_cv);
1.1 oster 171:
1.10 oster 172: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
173: * spare table */
174: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
175: * installation process */
1.249 oster 176: #endif
1.153 thorpej 177:
1.384 jdolecek 178: const int rf_b_pass = (B_PHYS|B_RAW|B_MEDIA_FLAGS);
179:
1.153 thorpej 180: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10 oster 181:
1.1 oster 182: /* prototypes */
1.187 christos 183: static void KernelWakeupFunc(struct buf *);
184: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225 christos 185: dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.384 jdolecek 186: void *, int);
1.300 christos 187: static void raidinit(struct raid_softc *);
1.335 mlelstv 188: static int raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp);
1.348 jdolecek 189: static int rf_get_component_caches(RF_Raid_t *raidPtr, int *);
1.1 oster 190:
1.261 dyoung 191: static int raid_match(device_t, cfdata_t, void *);
192: static void raid_attach(device_t, device_t, void *);
193: static int raid_detach(device_t, int);
1.130 gehenna 194:
1.385 riastrad 195: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
1.269 jld 196: daddr_t, daddr_t);
197: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
198: daddr_t, daddr_t, int);
199:
1.276 mrg 200: static int raidwrite_component_label(unsigned,
201: dev_t, struct vnode *, RF_ComponentLabel_t *);
202: static int raidread_component_label(unsigned,
203: dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269 jld 204:
1.335 mlelstv 205: static int raid_diskstart(device_t, struct buf *bp);
206: static int raid_dumpblocks(device_t, void *, daddr_t, int);
207: static int raid_lastclose(device_t);
1.269 jld 208:
1.324 mrg 209: static dev_type_open(raidopen);
210: static dev_type_close(raidclose);
211: static dev_type_read(raidread);
212: static dev_type_write(raidwrite);
213: static dev_type_ioctl(raidioctl);
214: static dev_type_strategy(raidstrategy);
215: static dev_type_dump(raiddump);
216: static dev_type_size(raidsize);
1.130 gehenna 217:
218: const struct bdevsw raid_bdevsw = {
1.305 dholland 219: .d_open = raidopen,
220: .d_close = raidclose,
221: .d_strategy = raidstrategy,
222: .d_ioctl = raidioctl,
223: .d_dump = raiddump,
224: .d_psize = raidsize,
1.311 dholland 225: .d_discard = nodiscard,
1.305 dholland 226: .d_flag = D_DISK
1.130 gehenna 227: };
228:
229: const struct cdevsw raid_cdevsw = {
1.305 dholland 230: .d_open = raidopen,
231: .d_close = raidclose,
232: .d_read = raidread,
233: .d_write = raidwrite,
234: .d_ioctl = raidioctl,
235: .d_stop = nostop,
236: .d_tty = notty,
237: .d_poll = nopoll,
238: .d_mmap = nommap,
239: .d_kqfilter = nokqfilter,
1.312 dholland 240: .d_discard = nodiscard,
1.305 dholland 241: .d_flag = D_DISK
1.130 gehenna 242: };
1.1 oster 243:
1.323 mlelstv 244: static struct dkdriver rf_dkdriver = {
1.335 mlelstv 245: .d_open = raidopen,
246: .d_close = raidclose,
1.323 mlelstv 247: .d_strategy = raidstrategy,
1.335 mlelstv 248: .d_diskstart = raid_diskstart,
249: .d_dumpblocks = raid_dumpblocks,
250: .d_lastclose = raid_lastclose,
1.323 mlelstv 251: .d_minphys = minphys
252: };
1.235 oster 253:
1.1 oster 254: #define raidunit(x) DISKUNIT(x)
1.335 mlelstv 255: #define raidsoftc(dev) (((struct raid_softc *)device_private(dev))->sc_r.softc)
1.1 oster 256:
1.202 oster 257: extern struct cfdriver raid_cd;
1.266 dyoung 258: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
259: raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
260: DVF_DETACH_SHUTDOWN);
1.202 oster 261:
1.353 mrg 262: /* Internal representation of a rf_recon_req */
263: struct rf_recon_req_internal {
264: RF_RowCol_t col;
265: RF_ReconReqFlags_t flags;
266: void *raidPtr;
267: };
268:
1.186 perry 269: /*
270: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
271: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 272: * kernel memory, especially on writes, and in degraded mode reads.
1.186 perry 273: *
274: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
275: * a single 64K write will typically require 64K for the old data,
276: * 64K for the old parity, and 64K for the new parity, for a total
1.28 oster 277: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 278: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 279: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186 perry 280: *
1.28 oster 281: * Now in degraded mode, for example, a 64K read on the above setup may
1.186 perry 282: * require data reconstruction, which will require *all* of the 4 remaining
1.28 oster 283: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 284: */
285:
286: #ifndef RAIDOUTSTANDING
1.28 oster 287: #define RAIDOUTSTANDING 6
1.20 oster 288: #endif
289:
1.1 oster 290: #define RAIDLABELDEV(dev) \
291: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
292:
293: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9 oster 294:
1.104 oster 295: static int raidlock(struct raid_softc *);
296: static void raidunlock(struct raid_softc *);
1.1 oster 297:
1.266 dyoung 298: static int raid_detach_unlocked(struct raid_softc *);
299:
1.104 oster 300: static void rf_markalldirty(RF_Raid_t *);
1.304 christos 301: static void rf_set_geometry(struct raid_softc *, RF_Raid_t *);
1.48 oster 302:
1.393 mrg 303: static void rf_ReconThread(struct rf_recon_req_internal *);
304: static void rf_RewriteParityThread(RF_Raid_t *raidPtr);
305: static void rf_CopybackThread(RF_Raid_t *raidPtr);
306: static void rf_ReconstructInPlaceThread(struct rf_recon_req_internal *);
307: static int rf_autoconfig(device_t);
308: static void rf_buildroothack(RF_ConfigSet_t *);
1.104 oster 309:
1.393 mrg 310: static RF_AutoConfig_t *rf_find_raid_components(void);
311: static RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
1.104 oster 312: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.393 mrg 313: static void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
314: static int rf_set_autoconfig(RF_Raid_t *, int);
315: static int rf_set_rootpartition(RF_Raid_t *, int);
316: static void rf_release_all_vps(RF_ConfigSet_t *);
317: static void rf_cleanup_config_set(RF_ConfigSet_t *);
318: static int rf_have_enough_components(RF_ConfigSet_t *);
319: static struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278 mrg 320: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48 oster 321:
1.295 erh 322: /*
323: * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
324: * Note that this is overridden by having RAID_AUTOCONFIG as an option
325: * in the kernel config file.
326: */
327: #ifdef RAID_AUTOCONFIG
328: int raidautoconfig = 1;
329: #else
330: int raidautoconfig = 0;
331: #endif
332: static bool raidautoconfigdone = false;
1.37 oster 333:
1.177 oster 334: struct RF_Pools_s rf_pools;
335:
1.300 christos 336: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
337: static kmutex_t raid_lock;
1.1 oster 338:
1.300 christos 339: static struct raid_softc *
340: raidcreate(int unit) {
341: struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
342: sc->sc_unit = unit;
1.327 pgoyette 343: cv_init(&sc->sc_cv, "raidunit");
344: mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_NONE);
1.300 christos 345: return sc;
346: }
1.1 oster 347:
1.300 christos 348: static void
349: raiddestroy(struct raid_softc *sc) {
1.327 pgoyette 350: cv_destroy(&sc->sc_cv);
351: mutex_destroy(&sc->sc_mutex);
1.300 christos 352: kmem_free(sc, sizeof(*sc));
353: }
1.50 oster 354:
1.300 christos 355: static struct raid_softc *
1.327 pgoyette 356: raidget(int unit, bool create) {
1.300 christos 357: struct raid_softc *sc;
358: if (unit < 0) {
359: #ifdef DIAGNOSTIC
360: panic("%s: unit %d!", __func__, unit);
361: #endif
362: return NULL;
363: }
364: mutex_enter(&raid_lock);
365: LIST_FOREACH(sc, &raids, sc_link) {
366: if (sc->sc_unit == unit) {
367: mutex_exit(&raid_lock);
368: return sc;
369: }
370: }
371: mutex_exit(&raid_lock);
1.327 pgoyette 372: if (!create)
373: return NULL;
1.379 chs 374: sc = raidcreate(unit);
1.300 christos 375: mutex_enter(&raid_lock);
376: LIST_INSERT_HEAD(&raids, sc, sc_link);
377: mutex_exit(&raid_lock);
378: return sc;
379: }
380:
1.385 riastrad 381: static void
1.300 christos 382: raidput(struct raid_softc *sc) {
383: mutex_enter(&raid_lock);
384: LIST_REMOVE(sc, sc_link);
385: mutex_exit(&raid_lock);
386: raiddestroy(sc);
387: }
1.1 oster 388:
1.300 christos 389: void
390: raidattach(int num)
391: {
1.62 oster 392:
1.142 thorpej 393: /*
1.327 pgoyette 394: * Device attachment and associated initialization now occurs
395: * as part of the module initialization.
1.142 thorpej 396: */
397: }
398:
1.393 mrg 399: static int
1.261 dyoung 400: rf_autoconfig(device_t self)
1.142 thorpej 401: {
402: RF_AutoConfig_t *ac_list;
403: RF_ConfigSet_t *config_sets;
404:
1.295 erh 405: if (!raidautoconfig || raidautoconfigdone == true)
1.389 skrll 406: return 0;
1.142 thorpej 407:
408: /* XXX This code can only be run once. */
1.295 erh 409: raidautoconfigdone = true;
1.142 thorpej 410:
1.307 christos 411: #ifdef __HAVE_CPU_BOOTCONF
412: /*
413: * 0. find the boot device if needed first so we can use it later
414: * this needs to be done before we autoconfigure any raid sets,
415: * because if we use wedges we are not going to be able to open
416: * the boot device later
417: */
418: if (booted_device == NULL)
419: cpu_bootconf();
420: #endif
1.48 oster 421: /* 1. locate all RAID components on the system */
1.258 ad 422: aprint_debug("Searching for RAID components...\n");
1.48 oster 423: ac_list = rf_find_raid_components();
424:
1.142 thorpej 425: /* 2. Sort them into their respective sets. */
1.48 oster 426: config_sets = rf_create_auto_sets(ac_list);
427:
1.142 thorpej 428: /*
1.299 oster 429: * 3. Evaluate each set and configure the valid ones.
1.142 thorpej 430: * This gets done in rf_buildroothack().
431: */
432: rf_buildroothack(config_sets);
1.48 oster 433:
1.213 christos 434: return 1;
1.48 oster 435: }
436:
1.367 christos 437: int
438: rf_inited(const struct raid_softc *rs) {
439: return (rs->sc_flags & RAIDF_INITED) != 0;
440: }
441:
1.368 oster 442: RF_Raid_t *
443: rf_get_raid(struct raid_softc *rs) {
444: return &rs->sc_r;
445: }
446:
1.367 christos 447: int
448: rf_get_unit(const struct raid_softc *rs) {
449: return rs->sc_unit;
450: }
451:
1.306 christos 452: static int
1.307 christos 453: rf_containsboot(RF_Raid_t *r, device_t bdv) {
1.359 bad 454: const char *bootname;
455: size_t len;
456:
457: /* if bdv is NULL, the set can't contain it. exit early. */
458: if (bdv == NULL)
459: return 0;
460:
461: bootname = device_xname(bdv);
462: len = strlen(bootname);
1.306 christos 463:
464: for (int col = 0; col < r->numCol; col++) {
1.307 christos 465: const char *devname = r->Disks[col].devname;
1.306 christos 466: devname += sizeof("/dev/") - 1;
1.307 christos 467: if (strncmp(devname, "dk", 2) == 0) {
468: const char *parent =
469: dkwedge_get_parent_name(r->Disks[col].dev);
470: if (parent != NULL)
471: devname = parent;
472: }
1.306 christos 473: if (strncmp(devname, bootname, len) == 0) {
474: struct raid_softc *sc = r->softc;
475: aprint_debug("raid%d includes boot device %s\n",
476: sc->sc_unit, devname);
477: return 1;
478: }
479: }
480: return 0;
481: }
482:
1.393 mrg 483: static void
1.142 thorpej 484: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48 oster 485: {
486: RF_ConfigSet_t *cset;
487: RF_ConfigSet_t *next_cset;
1.51 oster 488: int num_root;
1.300 christos 489: struct raid_softc *sc, *rsc;
1.378 martin 490: struct dk_softc *dksc = NULL; /* XXX gcc -Os: may be used uninit. */
1.48 oster 491:
1.300 christos 492: sc = rsc = NULL;
1.51 oster 493: num_root = 0;
1.48 oster 494: cset = config_sets;
1.271 dyoung 495: while (cset != NULL) {
1.48 oster 496: next_cset = cset->next;
1.186 perry 497: if (rf_have_enough_components(cset) &&
1.300 christos 498: cset->ac->clabel->autoconfigure == 1) {
499: sc = rf_auto_config_set(cset);
500: if (sc != NULL) {
1.359 bad 501: aprint_debug("raid%d: configured ok, rootable %d\n",
502: sc->sc_unit, cset->rootable);
1.51 oster 503: if (cset->rootable) {
1.300 christos 504: rsc = sc;
1.51 oster 505: num_root++;
506: }
507: } else {
508: /* The autoconfig didn't work :( */
1.300 christos 509: aprint_debug("Autoconfig failed\n");
1.51 oster 510: rf_release_all_vps(cset);
1.48 oster 511: }
512: } else {
1.186 perry 513: /* we're not autoconfiguring this set...
1.48 oster 514: release the associated resources */
1.49 oster 515: rf_release_all_vps(cset);
1.48 oster 516: }
517: /* cleanup */
1.49 oster 518: rf_cleanup_config_set(cset);
1.48 oster 519: cset = next_cset;
520: }
1.122 oster 521:
1.223 oster 522: /* if the user has specified what the root device should be
523: then we don't touch booted_device or boothowto... */
524:
1.359 bad 525: if (rootspec != NULL) {
526: DPRINTF("%s: rootspec %s\n", __func__, rootspec);
1.223 oster 527: return;
1.359 bad 528: }
1.223 oster 529:
1.122 oster 530: /* we found something bootable... */
531:
1.310 christos 532: /*
533: * XXX: The following code assumes that the root raid
534: * is the first ('a') partition. This is about the best
535: * we can do with a BSD disklabel, but we might be able
536: * to do better with a GPT label, by setting a specified
537: * attribute to indicate the root partition. We can then
538: * stash the partition number in the r->root_partition
539: * high bits (the bottom 2 bits are already used). For
540: * now we just set booted_partition to 0 when we override
541: * root.
542: */
1.122 oster 543: if (num_root == 1) {
1.306 christos 544: device_t candidate_root;
1.377 maxv 545: dksc = &rsc->sc_dksc;
1.335 mlelstv 546: if (dksc->sc_dkdev.dk_nwedges != 0) {
1.297 christos 547: char cname[sizeof(cset->ac->devname)];
1.344 christos 548: /* XXX: assume partition 'a' first */
1.297 christos 549: snprintf(cname, sizeof(cname), "%s%c",
1.335 mlelstv 550: device_xname(dksc->sc_dev), 'a');
1.306 christos 551: candidate_root = dkwedge_find_by_wname(cname);
1.344 christos 552: DPRINTF("%s: candidate wedge root=%s\n", __func__,
553: cname);
554: if (candidate_root == NULL) {
555: /*
556: * If that is not found, because we don't use
557: * disklabel, return the first dk child
558: * XXX: we can skip the 'a' check above
559: * and always do this...
560: */
561: size_t i = 0;
562: candidate_root = dkwedge_find_by_parent(
563: device_xname(dksc->sc_dev), &i);
564: }
565: DPRINTF("%s: candidate wedge root=%p\n", __func__,
566: candidate_root);
1.297 christos 567: } else
1.335 mlelstv 568: candidate_root = dksc->sc_dev;
1.344 christos 569: DPRINTF("%s: candidate root=%p\n", __func__, candidate_root);
570: DPRINTF("%s: booted_device=%p root_partition=%d "
1.359 bad 571: "contains_boot=%d",
572: __func__, booted_device, rsc->sc_r.root_partition,
573: rf_containsboot(&rsc->sc_r, booted_device));
574: /* XXX the check for booted_device == NULL can probably be
575: * dropped, now that rf_containsboot handles that case.
576: */
1.308 christos 577: if (booted_device == NULL ||
578: rsc->sc_r.root_partition == 1 ||
1.310 christos 579: rf_containsboot(&rsc->sc_r, booted_device)) {
1.308 christos 580: booted_device = candidate_root;
1.351 christos 581: booted_method = "raidframe/single";
1.310 christos 582: booted_partition = 0; /* XXX assume 'a' */
1.392 mrg 583: DPRINTF("%s: set booted_device=%s(%p)\n", __func__,
584: device_xname(booted_device), booted_device);
1.310 christos 585: }
1.122 oster 586: } else if (num_root > 1) {
1.344 christos 587: DPRINTF("%s: many roots=%d, %p\n", __func__, num_root,
588: booted_device);
1.226 oster 589:
1.385 riastrad 590: /*
1.226 oster 591: * Maybe the MD code can help. If it cannot, then
592: * setroot() will discover that we have no
593: * booted_device and will ask the user if nothing was
1.385 riastrad 594: * hardwired in the kernel config file
1.226 oster 595: */
1.385 riastrad 596: if (booted_device == NULL)
1.226 oster 597: return;
598:
599: num_root = 0;
1.300 christos 600: mutex_enter(&raid_lock);
601: LIST_FOREACH(sc, &raids, sc_link) {
602: RF_Raid_t *r = &sc->sc_r;
603: if (r->valid == 0)
1.226 oster 604: continue;
605:
1.300 christos 606: if (r->root_partition == 0)
1.226 oster 607: continue;
608:
1.306 christos 609: if (rf_containsboot(r, booted_device)) {
1.226 oster 610: num_root++;
1.300 christos 611: rsc = sc;
1.335 mlelstv 612: dksc = &rsc->sc_dksc;
1.226 oster 613: }
614: }
1.300 christos 615: mutex_exit(&raid_lock);
1.295 erh 616:
1.226 oster 617: if (num_root == 1) {
1.335 mlelstv 618: booted_device = dksc->sc_dev;
1.351 christos 619: booted_method = "raidframe/multi";
1.310 christos 620: booted_partition = 0; /* XXX assume 'a' */
1.226 oster 621: } else {
622: /* we can't guess.. require the user to answer... */
623: boothowto |= RB_ASKNAME;
624: }
1.51 oster 625: }
1.1 oster 626: }
627:
1.324 mrg 628: static int
1.169 oster 629: raidsize(dev_t dev)
1.1 oster 630: {
631: struct raid_softc *rs;
1.335 mlelstv 632: struct dk_softc *dksc;
633: unsigned int unit;
1.1 oster 634:
635: unit = raidunit(dev);
1.327 pgoyette 636: if ((rs = raidget(unit, false)) == NULL)
1.336 mlelstv 637: return -1;
1.335 mlelstv 638: dksc = &rs->sc_dksc;
639:
1.1 oster 640: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.336 mlelstv 641: return -1;
1.1 oster 642:
1.335 mlelstv 643: return dk_size(dksc, dev);
644: }
1.1 oster 645:
1.335 mlelstv 646: static int
647: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
648: {
649: unsigned int unit;
650: struct raid_softc *rs;
651: struct dk_softc *dksc;
1.1 oster 652:
1.335 mlelstv 653: unit = raidunit(dev);
654: if ((rs = raidget(unit, false)) == NULL)
655: return ENXIO;
656: dksc = &rs->sc_dksc;
1.1 oster 657:
1.335 mlelstv 658: if ((rs->sc_flags & RAIDF_INITED) == 0)
659: return ENODEV;
1.1 oster 660:
1.336 mlelstv 661: /*
662: Note that blkno is relative to this particular partition.
663: By adding adding RF_PROTECTED_SECTORS, we get a value that
664: is relative to the partition used for the underlying component.
665: */
666: blkno += RF_PROTECTED_SECTORS;
667:
1.380 riastrad 668: return dk_dump(dksc, dev, blkno, va, size, DK_DUMP_RECURSIVE);
1.1 oster 669: }
670:
1.324 mrg 671: static int
1.335 mlelstv 672: raid_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1.1 oster 673: {
1.335 mlelstv 674: struct raid_softc *rs = raidsoftc(dev);
1.231 oster 675: const struct bdevsw *bdev;
676: RF_Raid_t *raidPtr;
1.335 mlelstv 677: int c, sparecol, j, scol, dumpto;
1.231 oster 678: int error = 0;
679:
1.300 christos 680: raidPtr = &rs->sc_r;
1.231 oster 681:
682: /* we only support dumping to RAID 1 sets */
1.385 riastrad 683: if (raidPtr->Layout.numDataCol != 1 ||
1.231 oster 684: raidPtr->Layout.numParityCol != 1)
685: return EINVAL;
686:
687: if ((error = raidlock(rs)) != 0)
688: return error;
689:
690: /* figure out what device is alive.. */
691:
1.385 riastrad 692: /*
1.231 oster 693: Look for a component to dump to. The preference for the
694: component to dump to is as follows:
1.383 oster 695: 1) the first component
696: 2) a used_spare of the first component
697: 3) the second component
698: 4) a used_spare of the second component
1.231 oster 699: */
700:
701: dumpto = -1;
702: for (c = 0; c < raidPtr->numCol; c++) {
703: if (raidPtr->Disks[c].status == rf_ds_optimal) {
704: /* this might be the one */
705: dumpto = c;
706: break;
707: }
708: }
1.385 riastrad 709:
710: /*
1.383 oster 711: At this point we have possibly selected a live component.
712: If we didn't find a live ocmponent, we now check to see
713: if there is a relevant spared component.
1.231 oster 714: */
715:
716: for (c = 0; c < raidPtr->numSpare; c++) {
717: sparecol = raidPtr->numCol + c;
718: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
719: /* How about this one? */
720: scol = -1;
721: for(j=0;j<raidPtr->numCol;j++) {
722: if (raidPtr->Disks[j].spareCol == sparecol) {
723: scol = j;
724: break;
725: }
726: }
727: if (scol == 0) {
1.385 riastrad 728: /*
1.383 oster 729: We must have found a spared first
730: component! We'll take that over
731: anything else found so far. (We
732: couldn't have found a real first
733: component before, since this is a
734: used spare, and it's saying that
735: it's replacing the first
736: component.) On reboot (with
1.231 oster 737: autoconfiguration turned on)
1.383 oster 738: sparecol will become the first
739: component (component0) of this set.
1.231 oster 740: */
741: dumpto = sparecol;
742: break;
743: } else if (scol != -1) {
1.385 riastrad 744: /*
745: Must be a spared second component.
746: We'll dump to that if we havn't found
747: anything else so far.
1.231 oster 748: */
749: if (dumpto == -1)
750: dumpto = sparecol;
751: }
752: }
753: }
1.385 riastrad 754:
1.231 oster 755: if (dumpto == -1) {
756: /* we couldn't find any live components to dump to!?!?
757: */
758: error = EINVAL;
759: goto out;
760: }
761:
762: bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
1.342 mlelstv 763: if (bdev == NULL) {
764: error = ENXIO;
765: goto out;
766: }
1.231 oster 767:
1.385 riastrad 768: error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
1.336 mlelstv 769: blkno, va, nblk * raidPtr->bytesPerSector);
1.385 riastrad 770:
1.231 oster 771: out:
772: raidunlock(rs);
1.385 riastrad 773:
1.231 oster 774: return error;
1.1 oster 775: }
1.324 mrg 776:
1.1 oster 777: /* ARGSUSED */
1.324 mrg 778: static int
1.222 christos 779: raidopen(dev_t dev, int flags, int fmt,
780: struct lwp *l)
1.1 oster 781: {
1.9 oster 782: int unit = raidunit(dev);
1.1 oster 783: struct raid_softc *rs;
1.335 mlelstv 784: struct dk_softc *dksc;
785: int error = 0;
1.9 oster 786: int part, pmask;
787:
1.327 pgoyette 788: if ((rs = raidget(unit, true)) == NULL)
1.300 christos 789: return ENXIO;
1.1 oster 790: if ((error = raidlock(rs)) != 0)
1.389 skrll 791: return error;
1.266 dyoung 792:
793: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
794: error = EBUSY;
795: goto bad;
796: }
797:
1.335 mlelstv 798: dksc = &rs->sc_dksc;
1.1 oster 799:
800: part = DISKPART(dev);
801: pmask = (1 << part);
802:
1.335 mlelstv 803: if (!DK_BUSY(dksc, pmask) &&
1.13 oster 804: ((rs->sc_flags & RAIDF_INITED) != 0)) {
805: /* First one... mark things as dirty... Note that we *MUST*
806: have done a configure before this. I DO NOT WANT TO BE
807: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
808: THAT THEY BELONG TOGETHER!!!!! */
809: /* XXX should check to see if we're only open for reading
810: here... If so, we needn't do this, but then need some
811: other way of keeping track of what's happened.. */
812:
1.300 christos 813: rf_markalldirty(&rs->sc_r);
1.13 oster 814: }
815:
1.335 mlelstv 816: if ((rs->sc_flags & RAIDF_INITED) != 0)
817: error = dk_open(dksc, dev, flags, fmt, l);
1.1 oster 818:
1.213 christos 819: bad:
1.1 oster 820: raidunlock(rs);
821:
1.389 skrll 822: return error;
1.1 oster 823:
824:
825: }
1.324 mrg 826:
1.335 mlelstv 827: static int
828: raid_lastclose(device_t self)
829: {
830: struct raid_softc *rs = raidsoftc(self);
831:
832: /* Last one... device is not unconfigured yet.
833: Device shutdown has taken care of setting the
834: clean bits if RAIDF_INITED is not set
835: mark things as clean... */
836:
837: rf_update_component_labels(&rs->sc_r,
838: RF_FINAL_COMPONENT_UPDATE);
839:
840: /* pass to unlocked code */
841: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
842: rs->sc_flags |= RAIDF_DETACH;
843:
844: return 0;
845: }
846:
1.1 oster 847: /* ARGSUSED */
1.324 mrg 848: static int
1.222 christos 849: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1 oster 850: {
1.9 oster 851: int unit = raidunit(dev);
1.1 oster 852: struct raid_softc *rs;
1.335 mlelstv 853: struct dk_softc *dksc;
854: cfdata_t cf;
855: int error = 0, do_detach = 0, do_put = 0;
1.1 oster 856:
1.327 pgoyette 857: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 858: return ENXIO;
1.335 mlelstv 859: dksc = &rs->sc_dksc;
1.1 oster 860:
861: if ((error = raidlock(rs)) != 0)
1.389 skrll 862: return error;
1.1 oster 863:
1.335 mlelstv 864: if ((rs->sc_flags & RAIDF_INITED) != 0) {
865: error = dk_close(dksc, dev, flags, fmt, l);
866: if ((rs->sc_flags & RAIDF_DETACH) != 0)
867: do_detach = 1;
868: } else if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0)
869: do_put = 1;
1.1 oster 870:
1.335 mlelstv 871: raidunlock(rs);
1.1 oster 872:
1.335 mlelstv 873: if (do_detach) {
874: /* free the pseudo device attach bits */
875: cf = device_cfdata(dksc->sc_dev);
876: error = config_detach(dksc->sc_dev, 0);
1.385 riastrad 877: if (error == 0)
1.335 mlelstv 878: free(cf, M_RAIDFRAME);
879: } else if (do_put) {
880: raidput(rs);
1.1 oster 881: }
1.186 perry 882:
1.389 skrll 883: return error;
1.147 oster 884:
1.335 mlelstv 885: }
1.327 pgoyette 886:
1.335 mlelstv 887: static void
888: raid_wakeup(RF_Raid_t *raidPtr)
889: {
890: rf_lock_mutex2(raidPtr->iodone_lock);
891: rf_signal_cond2(raidPtr->iodone_cv);
892: rf_unlock_mutex2(raidPtr->iodone_lock);
1.1 oster 893: }
894:
1.324 mrg 895: static void
1.169 oster 896: raidstrategy(struct buf *bp)
1.1 oster 897: {
1.335 mlelstv 898: unsigned int unit;
899: struct raid_softc *rs;
900: struct dk_softc *dksc;
1.1 oster 901: RF_Raid_t *raidPtr;
902:
1.335 mlelstv 903: unit = raidunit(bp->b_dev);
1.327 pgoyette 904: if ((rs = raidget(unit, false)) == NULL) {
1.30 oster 905: bp->b_error = ENXIO;
1.335 mlelstv 906: goto fail;
1.30 oster 907: }
1.300 christos 908: if ((rs->sc_flags & RAIDF_INITED) == 0) {
909: bp->b_error = ENXIO;
1.335 mlelstv 910: goto fail;
1.1 oster 911: }
1.335 mlelstv 912: dksc = &rs->sc_dksc;
1.300 christos 913: raidPtr = &rs->sc_r;
1.335 mlelstv 914:
915: /* Queue IO only */
916: if (dk_strategy_defer(dksc, bp))
1.196 yamt 917: goto done;
1.1 oster 918:
1.335 mlelstv 919: /* schedule the IO to happen at the next convenient time */
920: raid_wakeup(raidPtr);
921:
922: done:
923: return;
924:
925: fail:
926: bp->b_resid = bp->b_bcount;
927: biodone(bp);
928: }
929:
930: static int
931: raid_diskstart(device_t dev, struct buf *bp)
932: {
933: struct raid_softc *rs = raidsoftc(dev);
934: RF_Raid_t *raidPtr;
1.1 oster 935:
1.335 mlelstv 936: raidPtr = &rs->sc_r;
937: if (!raidPtr->valid) {
938: db1_printf(("raid is not valid..\n"));
939: return ENODEV;
1.196 yamt 940: }
1.285 mrg 941:
1.335 mlelstv 942: /* XXX */
943: bp->b_resid = 0;
944:
945: return raiddoaccess(raidPtr, bp);
946: }
1.1 oster 947:
1.335 mlelstv 948: void
949: raiddone(RF_Raid_t *raidPtr, struct buf *bp)
950: {
951: struct raid_softc *rs;
952: struct dk_softc *dksc;
1.34 oster 953:
1.335 mlelstv 954: rs = raidPtr->softc;
955: dksc = &rs->sc_dksc;
1.34 oster 956:
1.335 mlelstv 957: dk_done(dksc, bp);
1.34 oster 958:
1.335 mlelstv 959: rf_lock_mutex2(raidPtr->mutex);
960: raidPtr->openings++;
961: rf_unlock_mutex2(raidPtr->mutex);
1.196 yamt 962:
1.335 mlelstv 963: /* schedule more IO */
964: raid_wakeup(raidPtr);
1.1 oster 965: }
1.324 mrg 966:
1.1 oster 967: /* ARGSUSED */
1.324 mrg 968: static int
1.222 christos 969: raidread(dev_t dev, struct uio *uio, int flags)
1.1 oster 970: {
1.9 oster 971: int unit = raidunit(dev);
1.1 oster 972: struct raid_softc *rs;
973:
1.327 pgoyette 974: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 975: return ENXIO;
1.1 oster 976:
977: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.389 skrll 978: return ENXIO;
1.1 oster 979:
1.389 skrll 980: return physio(raidstrategy, NULL, dev, B_READ, minphys, uio);
1.1 oster 981:
982: }
1.324 mrg 983:
1.1 oster 984: /* ARGSUSED */
1.324 mrg 985: static int
1.222 christos 986: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1 oster 987: {
1.9 oster 988: int unit = raidunit(dev);
1.1 oster 989: struct raid_softc *rs;
990:
1.327 pgoyette 991: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 992: return ENXIO;
1.1 oster 993:
994: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.389 skrll 995: return ENXIO;
1.147 oster 996:
1.389 skrll 997: return physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio);
1.1 oster 998:
999: }
1000:
1.266 dyoung 1001: static int
1002: raid_detach_unlocked(struct raid_softc *rs)
1003: {
1.335 mlelstv 1004: struct dk_softc *dksc = &rs->sc_dksc;
1005: RF_Raid_t *raidPtr;
1.266 dyoung 1006: int error;
1007:
1.300 christos 1008: raidPtr = &rs->sc_r;
1.266 dyoung 1009:
1.337 mlelstv 1010: if (DK_BUSY(dksc, 0) ||
1011: raidPtr->recon_in_progress != 0 ||
1012: raidPtr->parity_rewrite_in_progress != 0 ||
1013: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1014: return EBUSY;
1015:
1016: if ((rs->sc_flags & RAIDF_INITED) == 0)
1.333 mlelstv 1017: return 0;
1018:
1019: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1020:
1021: if ((error = rf_Shutdown(raidPtr)) != 0)
1.266 dyoung 1022: return error;
1023:
1.335 mlelstv 1024: rs->sc_flags &= ~RAIDF_INITED;
1025:
1026: /* Kill off any queued buffers */
1027: dk_drain(dksc);
1028: bufq_free(dksc->sc_bufq);
1029:
1.266 dyoung 1030: /* Detach the disk. */
1.335 mlelstv 1031: dkwedge_delall(&dksc->sc_dkdev);
1032: disk_detach(&dksc->sc_dkdev);
1033: disk_destroy(&dksc->sc_dkdev);
1034: dk_detach(dksc);
1.333 mlelstv 1035:
1.266 dyoung 1036: return 0;
1037: }
1038:
1.366 christos 1039: static bool
1040: rf_must_be_initialized(const struct raid_softc *rs, u_long cmd)
1041: {
1042: switch (cmd) {
1043: case RAIDFRAME_ADD_HOT_SPARE:
1044: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1045: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1046: case RAIDFRAME_CHECK_PARITY:
1047: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1048: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1049: case RAIDFRAME_CHECK_RECON_STATUS:
1050: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1051: case RAIDFRAME_COPYBACK:
1052: case RAIDFRAME_DELETE_COMPONENT:
1053: case RAIDFRAME_FAIL_DISK:
1054: case RAIDFRAME_GET_ACCTOTALS:
1055: case RAIDFRAME_GET_COMPONENT_LABEL:
1056: case RAIDFRAME_GET_INFO:
1057: case RAIDFRAME_GET_SIZE:
1058: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1059: case RAIDFRAME_INIT_LABELS:
1060: case RAIDFRAME_KEEP_ACCTOTALS:
1061: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1062: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1063: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1064: case RAIDFRAME_PARITYMAP_STATUS:
1065: case RAIDFRAME_REBUILD_IN_PLACE:
1066: case RAIDFRAME_REMOVE_HOT_SPARE:
1067: case RAIDFRAME_RESET_ACCTOTALS:
1068: case RAIDFRAME_REWRITEPARITY:
1069: case RAIDFRAME_SET_AUTOCONFIG:
1070: case RAIDFRAME_SET_COMPONENT_LABEL:
1071: case RAIDFRAME_SET_ROOT:
1.369 oster 1072: return (rs->sc_flags & RAIDF_INITED) == 0;
1.366 christos 1073: }
1074: return false;
1075: }
1076:
1077: int
1078: rf_fail_disk(RF_Raid_t *raidPtr, struct rf_recon_req *rr)
1079: {
1080: struct rf_recon_req_internal *rrint;
1081:
1082: if (raidPtr->Layout.map->faultsTolerated == 0) {
1083: /* Can't do this on a RAID 0!! */
1084: return EINVAL;
1085: }
1086:
1087: if (rr->col < 0 || rr->col >= raidPtr->numCol) {
1088: /* bad column */
1089: return EINVAL;
1090: }
1091:
1092: rf_lock_mutex2(raidPtr->mutex);
1093: if (raidPtr->status == rf_rs_reconstructing) {
1094: /* you can't fail a disk while we're reconstructing! */
1095: /* XXX wrong for RAID6 */
1096: goto out;
1097: }
1098: if ((raidPtr->Disks[rr->col].status == rf_ds_optimal) &&
1099: (raidPtr->numFailures > 0)) {
1100: /* some other component has failed. Let's not make
1101: things worse. XXX wrong for RAID6 */
1102: goto out;
1103: }
1104: if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1105: /* Can't fail a spared disk! */
1106: goto out;
1107: }
1108: rf_unlock_mutex2(raidPtr->mutex);
1109:
1110: /* make a copy of the recon request so that we don't rely on
1111: * the user's buffer */
1.374 christos 1112: rrint = RF_Malloc(sizeof(*rrint));
1.366 christos 1113: if (rrint == NULL)
1114: return(ENOMEM);
1115: rrint->col = rr->col;
1116: rrint->flags = rr->flags;
1117: rrint->raidPtr = raidPtr;
1118:
1119: return RF_CREATE_THREAD(raidPtr->recon_thread, rf_ReconThread,
1120: rrint, "raid_recon");
1121: out:
1122: rf_unlock_mutex2(raidPtr->mutex);
1123: return EINVAL;
1124: }
1125:
1.324 mrg 1126: static int
1.367 christos 1127: rf_copyinspecificbuf(RF_Config_t *k_cfg)
1128: {
1129: /* allocate a buffer for the layout-specific data, and copy it in */
1130: if (k_cfg->layoutSpecificSize == 0)
1131: return 0;
1132:
1133: if (k_cfg->layoutSpecificSize > 10000) {
1134: /* sanity check */
1135: return EINVAL;
1136: }
1137:
1138: u_char *specific_buf;
1.374 christos 1139: specific_buf = RF_Malloc(k_cfg->layoutSpecificSize);
1.367 christos 1140: if (specific_buf == NULL)
1141: return ENOMEM;
1142:
1143: int retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1144: k_cfg->layoutSpecificSize);
1145: if (retcode) {
1146: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1147: db1_printf(("%s: retcode=%d copyin.2\n", __func__, retcode));
1148: return retcode;
1149: }
1150:
1151: k_cfg->layoutSpecific = specific_buf;
1152: return 0;
1153: }
1154:
1155: static int
1156: rf_getConfiguration(struct raid_softc *rs, void *data, RF_Config_t **k_cfg)
1157: {
1.372 christos 1158: RF_Config_t *u_cfg = *((RF_Config_t **) data);
1159:
1.367 christos 1160: if (rs->sc_r.valid) {
1161: /* There is a valid RAID set running on this unit! */
1162: printf("raid%d: Device already configured!\n", rs->sc_unit);
1163: return EINVAL;
1164: }
1165:
1166: /* copy-in the configuration information */
1167: /* data points to a pointer to the configuration structure */
1.374 christos 1168: *k_cfg = RF_Malloc(sizeof(**k_cfg));
1.367 christos 1169: if (*k_cfg == NULL) {
1170: return ENOMEM;
1171: }
1.373 christos 1172: int retcode = copyin(u_cfg, *k_cfg, sizeof(RF_Config_t));
1.367 christos 1173: if (retcode == 0)
1174: return 0;
1175: RF_Free(*k_cfg, sizeof(RF_Config_t));
1176: db1_printf(("%s: retcode=%d copyin.1\n", __func__, retcode));
1177: rs->sc_flags |= RAIDF_SHUTDOWN;
1178: return retcode;
1179: }
1180:
1181: int
1182: rf_construct(struct raid_softc *rs, RF_Config_t *k_cfg)
1183: {
1184: int retcode;
1185: RF_Raid_t *raidPtr = &rs->sc_r;
1186:
1187: rs->sc_flags &= ~RAIDF_SHUTDOWN;
1188:
1189: if ((retcode = rf_copyinspecificbuf(k_cfg)) != 0)
1190: goto out;
1191:
1192: /* should do some kind of sanity check on the configuration.
1193: * Store the sum of all the bytes in the last byte? */
1194:
1195: /* configure the system */
1196:
1197: /*
1198: * Clear the entire RAID descriptor, just to make sure
1199: * there is no stale data left in the case of a
1200: * reconfiguration
1201: */
1202: memset(raidPtr, 0, sizeof(*raidPtr));
1203: raidPtr->softc = rs;
1204: raidPtr->raidid = rs->sc_unit;
1205:
1206: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1207:
1208: if (retcode == 0) {
1209: /* allow this many simultaneous IO's to
1210: this RAID device */
1211: raidPtr->openings = RAIDOUTSTANDING;
1212:
1213: raidinit(rs);
1214: raid_wakeup(raidPtr);
1215: rf_markalldirty(raidPtr);
1216: }
1217:
1218: /* free the buffers. No return code here. */
1219: if (k_cfg->layoutSpecificSize) {
1220: RF_Free(k_cfg->layoutSpecific, k_cfg->layoutSpecificSize);
1221: }
1222: out:
1223: RF_Free(k_cfg, sizeof(RF_Config_t));
1224: if (retcode) {
1225: /*
1226: * If configuration failed, set sc_flags so that we
1227: * will detach the device when we close it.
1228: */
1229: rs->sc_flags |= RAIDF_SHUTDOWN;
1230: }
1231: return retcode;
1232: }
1233:
1234: #if RF_DISABLED
1235: static int
1236: rf_set_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1237: {
1238:
1239: /* XXX check the label for valid stuff... */
1240: /* Note that some things *should not* get modified --
1241: the user should be re-initing the labels instead of
1242: trying to patch things.
1243: */
1244: #ifdef DEBUG
1245: int raidid = raidPtr->raidid;
1246: printf("raid%d: Got component label:\n", raidid);
1247: printf("raid%d: Version: %d\n", raidid, clabel->version);
1248: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1249: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1250: printf("raid%d: Column: %d\n", raidid, clabel->column);
1251: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1252: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1253: printf("raid%d: Status: %d\n", raidid, clabel->status);
1254: #endif /* DEBUG */
1255: clabel->row = 0;
1256: int column = clabel->column;
1257:
1258: if ((column < 0) || (column >= raidPtr->numCol)) {
1259: return(EINVAL);
1260: }
1261:
1262: /* XXX this isn't allowed to do anything for now :-) */
1263:
1264: /* XXX and before it is, we need to fill in the rest
1265: of the fields!?!?!?! */
1266: memcpy(raidget_component_label(raidPtr, column),
1267: clabel, sizeof(*clabel));
1268: raidflush_component_label(raidPtr, column);
1269: return 0;
1270: }
1271: #endif
1272:
1273: static int
1274: rf_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1275: {
1276: /*
1277: we only want the serial number from
1278: the above. We get all the rest of the information
1279: from the config that was used to create this RAID
1280: set.
1281: */
1282:
1283: raidPtr->serial_number = clabel->serial_number;
1284:
1285: for (int column = 0; column < raidPtr->numCol; column++) {
1286: RF_RaidDisk_t *diskPtr = &raidPtr->Disks[column];
1287: if (RF_DEAD_DISK(diskPtr->status))
1288: continue;
1289: RF_ComponentLabel_t *ci_label = raidget_component_label(
1290: raidPtr, column);
1291: /* Zeroing this is important. */
1292: memset(ci_label, 0, sizeof(*ci_label));
1293: raid_init_component_label(raidPtr, ci_label);
1294: ci_label->serial_number = raidPtr->serial_number;
1295: ci_label->row = 0; /* we dont' pretend to support more */
1296: rf_component_label_set_partitionsize(ci_label,
1297: diskPtr->partitionSize);
1298: ci_label->column = column;
1299: raidflush_component_label(raidPtr, column);
1300: /* XXXjld what about the spares? */
1301: }
1.385 riastrad 1302:
1.367 christos 1303: return 0;
1304: }
1305:
1306: static int
1307: rf_rebuild_in_place(RF_Raid_t *raidPtr, RF_SingleComponent_t *componentPtr)
1308: {
1309:
1310: if (raidPtr->Layout.map->faultsTolerated == 0) {
1311: /* Can't do this on a RAID 0!! */
1312: return EINVAL;
1313: }
1314:
1315: if (raidPtr->recon_in_progress == 1) {
1316: /* a reconstruct is already in progress! */
1317: return EINVAL;
1318: }
1319:
1320: RF_SingleComponent_t component;
1321: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1322: component.row = 0; /* we don't support any more */
1323: int column = component.column;
1324:
1325: if ((column < 0) || (column >= raidPtr->numCol)) {
1326: return EINVAL;
1327: }
1328:
1329: rf_lock_mutex2(raidPtr->mutex);
1330: if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1331: (raidPtr->numFailures > 0)) {
1332: /* XXX 0 above shouldn't be constant!!! */
1333: /* some component other than this has failed.
1334: Let's not make things worse than they already
1335: are... */
1336: printf("raid%d: Unable to reconstruct to disk at:\n",
1337: raidPtr->raidid);
1338: printf("raid%d: Col: %d Too many failures.\n",
1339: raidPtr->raidid, column);
1340: rf_unlock_mutex2(raidPtr->mutex);
1341: return EINVAL;
1342: }
1343:
1344: if (raidPtr->Disks[column].status == rf_ds_reconstructing) {
1345: printf("raid%d: Unable to reconstruct to disk at:\n",
1346: raidPtr->raidid);
1347: printf("raid%d: Col: %d "
1348: "Reconstruction already occurring!\n",
1349: raidPtr->raidid, column);
1350:
1351: rf_unlock_mutex2(raidPtr->mutex);
1352: return EINVAL;
1353: }
1354:
1355: if (raidPtr->Disks[column].status == rf_ds_spared) {
1356: rf_unlock_mutex2(raidPtr->mutex);
1357: return EINVAL;
1358: }
1359:
1360: rf_unlock_mutex2(raidPtr->mutex);
1361:
1362: struct rf_recon_req_internal *rrint;
1.374 christos 1363: rrint = RF_Malloc(sizeof(*rrint));
1.367 christos 1364: if (rrint == NULL)
1365: return ENOMEM;
1366:
1367: rrint->col = column;
1368: rrint->raidPtr = raidPtr;
1369:
1370: return RF_CREATE_THREAD(raidPtr->recon_thread,
1371: rf_ReconstructInPlaceThread, rrint, "raid_reconip");
1372: }
1373:
1374: static int
1375: rf_check_recon_status(RF_Raid_t *raidPtr, int *data)
1376: {
1377: /*
1378: * This makes no sense on a RAID 0, or if we are not reconstructing
1379: * so tell the user it's done.
1380: */
1381: if (raidPtr->Layout.map->faultsTolerated == 0 ||
1382: raidPtr->status != rf_rs_reconstructing) {
1383: *data = 100;
1384: return 0;
1385: }
1386: if (raidPtr->reconControl->numRUsTotal == 0) {
1387: *data = 0;
1388: return 0;
1389: }
1390: *data = (raidPtr->reconControl->numRUsComplete * 100
1391: / raidPtr->reconControl->numRUsTotal);
1392: return 0;
1393: }
1394:
1395: static int
1.225 christos 1396: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1 oster 1397: {
1.9 oster 1398: int unit = raidunit(dev);
1.335 mlelstv 1399: int part, pmask;
1.1 oster 1400: struct raid_softc *rs;
1.335 mlelstv 1401: struct dk_softc *dksc;
1.367 christos 1402: RF_Config_t *k_cfg;
1.42 oster 1403: RF_Raid_t *raidPtr;
1.41 oster 1404: RF_AccTotals_t *totals;
1.367 christos 1405: RF_SingleComponent_t component;
1.371 oster 1406: RF_DeviceConfig_t *d_cfg, *ucfgp;
1.11 oster 1407: int retcode = 0;
1408: int column;
1.48 oster 1409: RF_ComponentLabel_t *clabel;
1.12 oster 1410: RF_SingleComponent_t *sparePtr,*componentPtr;
1.353 mrg 1411: int d;
1.1 oster 1412:
1.327 pgoyette 1413: if ((rs = raidget(unit, false)) == NULL)
1.300 christos 1414: return ENXIO;
1.366 christos 1415:
1.335 mlelstv 1416: dksc = &rs->sc_dksc;
1.300 christos 1417: raidPtr = &rs->sc_r;
1.1 oster 1418:
1.276 mrg 1419: db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1.366 christos 1420: (int) DISKPART(dev), (int) unit, cmd));
1.1 oster 1421:
1422: /* Must be initialized for these... */
1.366 christos 1423: if (rf_must_be_initialized(rs, cmd))
1424: return ENXIO;
1.9 oster 1425:
1.358 pgoyette 1426: switch (cmd) {
1.1 oster 1427: /* configure the system */
1428: case RAIDFRAME_CONFIGURE:
1.367 christos 1429: if ((retcode = rf_getConfiguration(rs, data, &k_cfg)) != 0)
1430: return retcode;
1431: return rf_construct(rs, k_cfg);
1.9 oster 1432:
1433: /* shutdown the system */
1.1 oster 1434: case RAIDFRAME_SHUTDOWN:
1.9 oster 1435:
1.266 dyoung 1436: part = DISKPART(dev);
1437: pmask = (1 << part);
1438:
1.367 christos 1439: if ((retcode = raidlock(rs)) != 0)
1440: return retcode;
1.1 oster 1441:
1.337 mlelstv 1442: if (DK_BUSY(dksc, pmask) ||
1443: raidPtr->recon_in_progress != 0 ||
1444: raidPtr->parity_rewrite_in_progress != 0 ||
1445: raidPtr->copyback_in_progress != 0)
1.266 dyoung 1446: retcode = EBUSY;
1447: else {
1.335 mlelstv 1448: /* detach and free on close */
1.266 dyoung 1449: rs->sc_flags |= RAIDF_SHUTDOWN;
1450: retcode = 0;
1.9 oster 1451: }
1.11 oster 1452:
1.266 dyoung 1453: raidunlock(rs);
1.1 oster 1454:
1.367 christos 1455: return retcode;
1.11 oster 1456: case RAIDFRAME_GET_COMPONENT_LABEL:
1.353 mrg 1457: return rf_get_component_label(raidPtr, data);
1.11 oster 1458:
1.367 christos 1459: #if RF_DISABLED
1.11 oster 1460: case RAIDFRAME_SET_COMPONENT_LABEL:
1.367 christos 1461: return rf_set_component_label(raidPtr, data);
1462: #endif
1.11 oster 1463:
1.367 christos 1464: case RAIDFRAME_INIT_LABELS:
1465: return rf_init_component_label(raidPtr, data);
1.12 oster 1466:
1.48 oster 1467: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1468: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186 perry 1469: printf("raid%d: New autoconfig value is: %d\n",
1.123 oster 1470: raidPtr->raidid, d);
1.78 minoura 1471: *(int *) data = d;
1.367 christos 1472: return retcode;
1.48 oster 1473:
1474: case RAIDFRAME_SET_ROOT:
1.78 minoura 1475: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186 perry 1476: printf("raid%d: New rootpartition value is: %d\n",
1.123 oster 1477: raidPtr->raidid, d);
1.78 minoura 1478: *(int *) data = d;
1.367 christos 1479: return retcode;
1.9 oster 1480:
1.1 oster 1481: /* initialize all parity */
1482: case RAIDFRAME_REWRITEPARITY:
1483:
1.42 oster 1484: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1485: /* Parity for RAID 0 is trivially correct */
1.42 oster 1486: raidPtr->parity_good = RF_RAID_CLEAN;
1.367 christos 1487: return 0;
1.17 oster 1488: }
1.186 perry 1489:
1.42 oster 1490: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1491: /* Re-write is already in progress! */
1.367 christos 1492: return EINVAL;
1.37 oster 1493: }
1.27 oster 1494:
1.367 christos 1495: return RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1496: rf_RewriteParityThread, raidPtr,"raid_parity");
1.11 oster 1497:
1498: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1499: sparePtr = (RF_SingleComponent_t *) data;
1.367 christos 1500: memcpy(&component, sparePtr, sizeof(RF_SingleComponent_t));
1501: return rf_add_hot_spare(raidPtr, &component);
1.11 oster 1502:
1503: case RAIDFRAME_REMOVE_HOT_SPARE:
1.367 christos 1504: return retcode;
1.73 oster 1505:
1506: case RAIDFRAME_DELETE_COMPONENT:
1507: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1508: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1509: return rf_delete_component(raidPtr, &component);
1.73 oster 1510:
1511: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1512: componentPtr = (RF_SingleComponent_t *)data;
1.367 christos 1513: memcpy(&component, componentPtr, sizeof(RF_SingleComponent_t));
1514: return rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1515:
1.12 oster 1516: case RAIDFRAME_REBUILD_IN_PLACE:
1.367 christos 1517: return rf_rebuild_in_place(raidPtr, data);
1.24 oster 1518:
1.366 christos 1519: case RAIDFRAME_GET_INFO:
1.371 oster 1520: ucfgp = *(RF_DeviceConfig_t **)data;
1.374 christos 1521: d_cfg = RF_Malloc(sizeof(*d_cfg));
1.41 oster 1522: if (d_cfg == NULL)
1.366 christos 1523: return ENOMEM;
1.353 mrg 1524: retcode = rf_get_info(raidPtr, d_cfg);
1525: if (retcode == 0) {
1.371 oster 1526: retcode = copyout(d_cfg, ucfgp, sizeof(*d_cfg));
1.41 oster 1527: }
1528: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1.366 christos 1529: return retcode;
1.9 oster 1530:
1.22 oster 1531: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1532: *(int *) data = raidPtr->parity_good;
1.367 christos 1533: return 0;
1.41 oster 1534:
1.269 jld 1535: case RAIDFRAME_PARITYMAP_STATUS:
1.273 jld 1536: if (rf_paritymap_ineligible(raidPtr))
1537: return EINVAL;
1.367 christos 1538: rf_paritymap_status(raidPtr->parity_map, data);
1.269 jld 1539: return 0;
1540:
1541: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273 jld 1542: if (rf_paritymap_ineligible(raidPtr))
1543: return EINVAL;
1.269 jld 1544: if (raidPtr->parity_map == NULL)
1545: return ENOENT; /* ??? */
1.367 christos 1546: if (rf_paritymap_set_params(raidPtr->parity_map, data, 1) != 0)
1.269 jld 1547: return EINVAL;
1548: return 0;
1549:
1550: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273 jld 1551: if (rf_paritymap_ineligible(raidPtr))
1552: return EINVAL;
1.269 jld 1553: *(int *) data = rf_paritymap_get_disable(raidPtr);
1554: return 0;
1555:
1556: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273 jld 1557: if (rf_paritymap_ineligible(raidPtr))
1558: return EINVAL;
1.269 jld 1559: rf_paritymap_set_disable(raidPtr, *(int *)data);
1560: /* XXX should errors be passed up? */
1561: return 0;
1562:
1.1 oster 1563: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1564: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.367 christos 1565: return 0;
1.9 oster 1566:
1.1 oster 1567: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1568: totals = (RF_AccTotals_t *) data;
1.42 oster 1569: *totals = raidPtr->acc_totals;
1.366 christos 1570: return 0;
1.9 oster 1571:
1.1 oster 1572: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1573: raidPtr->keep_acc_totals = *(int *)data;
1.366 christos 1574: return 0;
1.9 oster 1575:
1.1 oster 1576: case RAIDFRAME_GET_SIZE:
1.42 oster 1577: *(int *) data = raidPtr->totalSectors;
1.366 christos 1578: return 0;
1.1 oster 1579:
1580: case RAIDFRAME_FAIL_DISK:
1.366 christos 1581: return rf_fail_disk(raidPtr, data);
1.9 oster 1582:
1583: /* invoke a copyback operation after recon on whatever disk
1584: * needs it, if any */
1585: case RAIDFRAME_COPYBACK:
1.24 oster 1586:
1.42 oster 1587: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1588: /* This makes no sense on a RAID 0!! */
1.367 christos 1589: return EINVAL;
1.24 oster 1590: }
1591:
1.42 oster 1592: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1593: /* Copyback is already in progress! */
1.367 christos 1594: return EINVAL;
1.37 oster 1595: }
1.27 oster 1596:
1.367 christos 1597: return RF_CREATE_THREAD(raidPtr->copyback_thread,
1598: rf_CopybackThread, raidPtr, "raid_copyback");
1.9 oster 1599:
1.1 oster 1600: /* return the percentage completion of reconstruction */
1.37 oster 1601: case RAIDFRAME_CHECK_RECON_STATUS:
1.367 christos 1602: return rf_check_recon_status(raidPtr, data);
1603:
1.83 oster 1604: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.353 mrg 1605: rf_check_recon_status_ext(raidPtr, data);
1.367 christos 1606: return 0;
1.9 oster 1607:
1.37 oster 1608: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1609: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1610: /* This makes no sense on a RAID 0, so tell the
1611: user it's done. */
1612: *(int *) data = 100;
1.367 christos 1613: return 0;
1.37 oster 1614: }
1.42 oster 1615: if (raidPtr->parity_rewrite_in_progress == 1) {
1.186 perry 1616: *(int *) data = 100 *
1617: raidPtr->parity_rewrite_stripes_done /
1.83 oster 1618: raidPtr->Layout.numStripe;
1.37 oster 1619: } else {
1620: *(int *) data = 100;
1621: }
1.367 christos 1622: return 0;
1.37 oster 1623:
1.83 oster 1624: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.353 mrg 1625: rf_check_parityrewrite_status_ext(raidPtr, data);
1.367 christos 1626: return 0;
1.83 oster 1627:
1.37 oster 1628: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1629: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1630: /* This makes no sense on a RAID 0 */
1.83 oster 1631: *(int *) data = 100;
1.367 christos 1632: return 0;
1.37 oster 1633: }
1.42 oster 1634: if (raidPtr->copyback_in_progress == 1) {
1635: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1636: raidPtr->Layout.numStripe;
1.37 oster 1637: } else {
1638: *(int *) data = 100;
1639: }
1.367 christos 1640: return 0;
1.37 oster 1641:
1.83 oster 1642: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.353 mrg 1643: rf_check_copyback_status_ext(raidPtr, data);
1644: return 0;
1.37 oster 1645:
1.341 christos 1646: case RAIDFRAME_SET_LAST_UNIT:
1647: for (column = 0; column < raidPtr->numCol; column++)
1648: if (raidPtr->Disks[column].status != rf_ds_optimal)
1649: return EBUSY;
1650:
1651: for (column = 0; column < raidPtr->numCol; column++) {
1652: clabel = raidget_component_label(raidPtr, column);
1653: clabel->last_unit = *(int *)data;
1654: raidflush_component_label(raidPtr, column);
1655: }
1656: rs->sc_cflags |= RAIDF_UNIT_CHANGED;
1657: return 0;
1658:
1.9 oster 1659: /* the sparetable daemon calls this to wait for the kernel to
1660: * need a spare table. this ioctl does not return until a
1661: * spare table is needed. XXX -- calling mpsleep here in the
1662: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1663: * -- I should either compute the spare table in the kernel,
1664: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1665: * character device) for delivering the table -- XXX */
1.367 christos 1666: #if RF_DISABLED
1.1 oster 1667: case RAIDFRAME_SPARET_WAIT:
1.287 mrg 1668: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1669: while (!rf_sparet_wait_queue)
1.287 mrg 1670: rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.367 christos 1671: RF_SparetWait_t *waitreq = rf_sparet_wait_queue;
1.1 oster 1672: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287 mrg 1673: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1674:
1.42 oster 1675: /* structure assignment */
1.186 perry 1676: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1677:
1.1 oster 1678: RF_Free(waitreq, sizeof(*waitreq));
1.367 christos 1679: return 0;
1.9 oster 1680:
1681: /* wakes up a process waiting on SPARET_WAIT and puts an error
1682: * code in it that will cause the dameon to exit */
1.1 oster 1683: case RAIDFRAME_ABORT_SPARET_WAIT:
1.374 christos 1684: waitreq = RF_Malloc(sizeof(*waitreq));
1.1 oster 1685: waitreq->fcol = -1;
1.287 mrg 1686: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1687: waitreq->next = rf_sparet_wait_queue;
1688: rf_sparet_wait_queue = waitreq;
1.367 christos 1689: rf_broadcast_cond2(rf_sparet_wait_cv);
1.287 mrg 1690: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.367 christos 1691: return 0;
1.1 oster 1692:
1.9 oster 1693: /* used by the spare table daemon to deliver a spare table
1694: * into the kernel */
1.1 oster 1695: case RAIDFRAME_SEND_SPARET:
1.9 oster 1696:
1.1 oster 1697: /* install the spare table */
1.42 oster 1698: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1699:
1700: /* respond to the requestor. the return status of the spare
1701: * table installation is passed in the "fcol" field */
1.374 christos 1702: waitred = RF_Malloc(sizeof(*waitreq));
1.1 oster 1703: waitreq->fcol = retcode;
1.287 mrg 1704: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1705: waitreq->next = rf_sparet_resp_queue;
1706: rf_sparet_resp_queue = waitreq;
1.287 mrg 1707: rf_broadcast_cond2(rf_sparet_resp_cv);
1708: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1709:
1.367 christos 1710: return retcode;
1711: #endif
1712: default:
1.372 christos 1713: /*
1714: * Don't bother trying to load compat modules
1715: * if it is not our ioctl. This is more efficient
1716: * and makes rump tests not depend on compat code
1717: */
1718: if (IOCGROUP(cmd) != 'r')
1719: break;
1.367 christos 1720: #ifdef _LP64
1721: if ((l->l_proc->p_flag & PK_32) != 0) {
1722: module_autoload("compat_netbsd32_raid",
1723: MODULE_CLASS_EXEC);
1.376 pgoyette 1724: MODULE_HOOK_CALL(raidframe_netbsd32_ioctl_hook,
1.367 christos 1725: (rs, cmd, data), enosys(), retcode);
1726: if (retcode != EPASSTHROUGH)
1727: return retcode;
1728: }
1.1 oster 1729: #endif
1.367 christos 1730: module_autoload("compat_raid_80", MODULE_CLASS_EXEC);
1.376 pgoyette 1731: MODULE_HOOK_CALL(raidframe_ioctl_80_hook,
1.367 christos 1732: (rs, cmd, data), enosys(), retcode);
1733: if (retcode != EPASSTHROUGH)
1734: return retcode;
1.1 oster 1735:
1.367 christos 1736: module_autoload("compat_raid_50", MODULE_CLASS_EXEC);
1.376 pgoyette 1737: MODULE_HOOK_CALL(raidframe_ioctl_50_hook,
1.367 christos 1738: (rs, cmd, data), enosys(), retcode);
1739: if (retcode != EPASSTHROUGH)
1740: return retcode;
1.36 oster 1741: break; /* fall through to the os-specific code below */
1.1 oster 1742:
1743: }
1.9 oster 1744:
1.42 oster 1745: if (!raidPtr->valid)
1.389 skrll 1746: return EINVAL;
1.9 oster 1747:
1.1 oster 1748: /*
1749: * Add support for "regular" device ioctls here.
1750: */
1.385 riastrad 1751:
1.1 oster 1752: switch (cmd) {
1.348 jdolecek 1753: case DIOCGCACHE:
1754: retcode = rf_get_component_caches(raidPtr, (int *)data);
1755: break;
1756:
1.252 oster 1757: case DIOCCACHESYNC:
1.390 christos 1758: retcode = rf_sync_component_caches(raidPtr, *(int *)data);
1.347 jdolecek 1759: break;
1.298 buhrow 1760:
1.1 oster 1761: default:
1.346 jdolecek 1762: retcode = dk_ioctl(dksc, dev, cmd, data, flag, l);
1.347 jdolecek 1763: break;
1.1 oster 1764: }
1.346 jdolecek 1765:
1.389 skrll 1766: return retcode;
1.1 oster 1767:
1768: }
1769:
1770:
1.9 oster 1771: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1772: RAIDframe device. */
1773:
1774:
1.59 oster 1775: static void
1.300 christos 1776: raidinit(struct raid_softc *rs)
1.1 oster 1777: {
1.262 cegger 1778: cfdata_t cf;
1.335 mlelstv 1779: unsigned int unit;
1780: struct dk_softc *dksc = &rs->sc_dksc;
1.300 christos 1781: RF_Raid_t *raidPtr = &rs->sc_r;
1.335 mlelstv 1782: device_t dev;
1.1 oster 1783:
1.59 oster 1784: unit = raidPtr->raidid;
1.1 oster 1785:
1.179 itojun 1786: /* XXX doesn't check bounds. */
1.335 mlelstv 1787: snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%u", unit);
1.1 oster 1788:
1.217 oster 1789: /* attach the pseudo device */
1790: cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1791: cf->cf_name = raid_cd.cd_name;
1792: cf->cf_atname = raid_cd.cd_name;
1793: cf->cf_unit = unit;
1794: cf->cf_fstate = FSTATE_STAR;
1795:
1.335 mlelstv 1796: dev = config_attach_pseudo(cf);
1797: if (dev == NULL) {
1.217 oster 1798: printf("raid%d: config_attach_pseudo failed\n",
1.270 christos 1799: raidPtr->raidid);
1.265 pooka 1800: free(cf, M_RAIDFRAME);
1801: return;
1.217 oster 1802: }
1803:
1.335 mlelstv 1804: /* provide a backpointer to the real softc */
1805: raidsoftc(dev) = rs;
1806:
1.1 oster 1807: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1808: * other things, so it's critical to call this *BEFORE* we try putzing
1809: * with disklabels. */
1.335 mlelstv 1810: dk_init(dksc, dev, DKTYPE_RAID);
1811: disk_init(&dksc->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.1 oster 1812:
1813: /* XXX There may be a weird interaction here between this, and
1.9 oster 1814: * protectedSectors, as used in RAIDframe. */
1.11 oster 1815:
1.9 oster 1816: rs->sc_size = raidPtr->totalSectors;
1.234 oster 1817:
1.335 mlelstv 1818: /* Attach dk and disk subsystems */
1819: dk_attach(dksc);
1820: disk_attach(&dksc->sc_dkdev);
1.318 mlelstv 1821: rf_set_geometry(rs, raidPtr);
1822:
1.335 mlelstv 1823: bufq_alloc(&dksc->sc_bufq, "fcfs", BUFQ_SORT_RAWBLOCK);
1824:
1825: /* mark unit as usuable */
1826: rs->sc_flags |= RAIDF_INITED;
1.234 oster 1827:
1.335 mlelstv 1828: dkwedge_discover(&dksc->sc_dkdev);
1.1 oster 1829: }
1.335 mlelstv 1830:
1.150 oster 1831: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1 oster 1832: /* wake up the daemon & tell it to get us a spare table
1833: * XXX
1.9 oster 1834: * the entries in the queues should be tagged with the raidPtr
1.186 perry 1835: * so that in the extremely rare case that two recons happen at once,
1.11 oster 1836: * we know for which device were requesting a spare table
1.1 oster 1837: * XXX
1.186 perry 1838: *
1.39 oster 1839: * XXX This code is not currently used. GO
1.1 oster 1840: */
1.186 perry 1841: int
1.169 oster 1842: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9 oster 1843: {
1844: int retcode;
1845:
1.287 mrg 1846: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1847: req->next = rf_sparet_wait_queue;
1848: rf_sparet_wait_queue = req;
1.289 mrg 1849: rf_broadcast_cond2(rf_sparet_wait_cv);
1.9 oster 1850:
1851: /* mpsleep unlocks the mutex */
1852: while (!rf_sparet_resp_queue) {
1.289 mrg 1853: rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9 oster 1854: }
1855: req = rf_sparet_resp_queue;
1856: rf_sparet_resp_queue = req->next;
1.287 mrg 1857: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1858:
1859: retcode = req->fcol;
1860: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1861: * alloc'd */
1.389 skrll 1862: return retcode;
1.1 oster 1863: }
1.150 oster 1864: #endif
1.39 oster 1865:
1.186 perry 1866: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11 oster 1867: * bp & passes it down.
1.1 oster 1868: * any calls originating in the kernel must use non-blocking I/O
1869: * do some extra sanity checking to return "appropriate" error values for
1870: * certain conditions (to make some standard utilities work)
1.186 perry 1871: *
1.34 oster 1872: * Formerly known as: rf_DoAccessKernel
1.1 oster 1873: */
1.34 oster 1874: void
1.169 oster 1875: raidstart(RF_Raid_t *raidPtr)
1.1 oster 1876: {
1877: struct raid_softc *rs;
1.335 mlelstv 1878: struct dk_softc *dksc;
1.1 oster 1879:
1.300 christos 1880: rs = raidPtr->softc;
1.335 mlelstv 1881: dksc = &rs->sc_dksc;
1.56 oster 1882: /* quick check to see if anything has died recently */
1.291 mrg 1883: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1884: if (raidPtr->numNewFailures > 0) {
1.291 mrg 1885: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1886: rf_update_component_labels(raidPtr,
1.91 oster 1887: RF_NORMAL_COMPONENT_UPDATE);
1.291 mrg 1888: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 1889: raidPtr->numNewFailures--;
1890: }
1.335 mlelstv 1891: rf_unlock_mutex2(raidPtr->mutex);
1.56 oster 1892:
1.335 mlelstv 1893: if ((rs->sc_flags & RAIDF_INITED) == 0) {
1894: printf("raid%d: raidstart not ready\n", raidPtr->raidid);
1895: return;
1896: }
1.34 oster 1897:
1.335 mlelstv 1898: dk_start(dksc, NULL);
1899: }
1.34 oster 1900:
1.335 mlelstv 1901: static int
1902: raiddoaccess(RF_Raid_t *raidPtr, struct buf *bp)
1903: {
1904: RF_SectorCount_t num_blocks, pb, sum;
1905: RF_RaidAddr_t raid_addr;
1906: daddr_t blocknum;
1907: int do_async;
1908: int rc;
1.186 perry 1909:
1.335 mlelstv 1910: rf_lock_mutex2(raidPtr->mutex);
1911: if (raidPtr->openings == 0) {
1912: rf_unlock_mutex2(raidPtr->mutex);
1913: return EAGAIN;
1914: }
1915: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 1916:
1.335 mlelstv 1917: blocknum = bp->b_rawblkno;
1.186 perry 1918:
1.335 mlelstv 1919: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1920: (int) blocknum));
1.1 oster 1921:
1.335 mlelstv 1922: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
1923: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.1 oster 1924:
1.335 mlelstv 1925: /* *THIS* is where we adjust what block we're going to...
1926: * but DO NOT TOUCH bp->b_blkno!!! */
1927: raid_addr = blocknum;
1928:
1929: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
1930: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
1931: sum = raid_addr + num_blocks + pb;
1932: if (1 || rf_debugKernelAccess) {
1933: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
1934: (int) raid_addr, (int) sum, (int) num_blocks,
1935: (int) pb, (int) bp->b_resid));
1936: }
1937: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
1938: || (sum < num_blocks) || (sum < pb)) {
1939: rc = ENOSPC;
1940: goto done;
1941: }
1942: /*
1943: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
1944: */
1.186 perry 1945:
1.335 mlelstv 1946: if (bp->b_bcount & raidPtr->sectorMask) {
1947: rc = ENOSPC;
1948: goto done;
1949: }
1950: db1_printf(("Calling DoAccess..\n"));
1.99 oster 1951:
1.20 oster 1952:
1.335 mlelstv 1953: rf_lock_mutex2(raidPtr->mutex);
1954: raidPtr->openings--;
1.291 mrg 1955: rf_unlock_mutex2(raidPtr->mutex);
1.20 oster 1956:
1.335 mlelstv 1957: /*
1958: * Everything is async.
1959: */
1960: do_async = 1;
1.20 oster 1961:
1.335 mlelstv 1962: /* don't ever condition on bp->b_flags & B_WRITE.
1963: * always condition on B_READ instead */
1.7 explorer 1964:
1.335 mlelstv 1965: rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
1966: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
1967: do_async, raid_addr, num_blocks,
1968: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1969:
1970: done:
1971: return rc;
1972: }
1.7 explorer 1973:
1.1 oster 1974: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
1975:
1.186 perry 1976: int
1.169 oster 1977: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1 oster 1978: {
1.9 oster 1979: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 1980: struct buf *bp;
1.9 oster 1981:
1.1 oster 1982: req->queue = queue;
1983: bp = req->bp;
1984:
1985: switch (req->type) {
1.9 oster 1986: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 1987: /* XXX need to do something extra here.. */
1.9 oster 1988: /* I'm leaving this in, as I've never actually seen it used,
1989: * and I'd like folks to report it... GO */
1.391 mrg 1990: printf("%s: WAKEUP CALLED\n", __func__);
1.1 oster 1991: queue->numOutstanding++;
1992:
1.197 oster 1993: bp->b_flags = 0;
1.207 simonb 1994: bp->b_private = req;
1.1 oster 1995:
1.194 oster 1996: KernelWakeupFunc(bp);
1.1 oster 1997: break;
1.9 oster 1998:
1.1 oster 1999: case RF_IO_TYPE_READ:
2000: case RF_IO_TYPE_WRITE:
1.175 oster 2001: #if RF_ACC_TRACE > 0
1.1 oster 2002: if (req->tracerec) {
2003: RF_ETIMER_START(req->tracerec->timer);
2004: }
1.175 oster 2005: #endif
1.194 oster 2006: InitBP(bp, queue->rf_cinfo->ci_vp,
1.197 oster 2007: op, queue->rf_cinfo->ci_dev,
1.9 oster 2008: req->sectorOffset, req->numSector,
2009: req->buf, KernelWakeupFunc, (void *) req,
1.384 jdolecek 2010: queue->raidPtr->logBytesPerSector);
1.1 oster 2011:
2012: if (rf_debugKernelAccess) {
1.9 oster 2013: db1_printf(("dispatch: bp->b_blkno = %ld\n",
2014: (long) bp->b_blkno));
1.1 oster 2015: }
2016: queue->numOutstanding++;
2017: queue->last_deq_sector = req->sectorOffset;
1.9 oster 2018: /* acc wouldn't have been let in if there were any pending
2019: * reqs at any other priority */
1.1 oster 2020: queue->curPriority = req->priority;
2021:
1.166 oster 2022: db1_printf(("Going for %c to unit %d col %d\n",
1.186 perry 2023: req->type, queue->raidPtr->raidid,
1.166 oster 2024: queue->col));
1.1 oster 2025: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 2026: (int) req->sectorOffset, (int) req->numSector,
2027: (int) (req->numSector <<
2028: queue->raidPtr->logBytesPerSector),
2029: (int) queue->raidPtr->logBytesPerSector));
1.256 oster 2030:
2031: /*
1.385 riastrad 2032: * XXX: drop lock here since this can block at
1.256 oster 2033: * least with backing SCSI devices. Retake it
2034: * to minimize fuss with calling interfaces.
2035: */
2036:
2037: RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247 oster 2038: bdev_strategy(bp);
1.256 oster 2039: RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1 oster 2040: break;
1.9 oster 2041:
1.1 oster 2042: default:
2043: panic("bad req->type in rf_DispatchKernelIO");
2044: }
2045: db1_printf(("Exiting from DispatchKernelIO\n"));
1.134 oster 2046:
1.389 skrll 2047: return 0;
1.1 oster 2048: }
1.9 oster 2049: /* this is the callback function associated with a I/O invoked from
1.1 oster 2050: kernel code.
2051: */
1.186 perry 2052: static void
1.194 oster 2053: KernelWakeupFunc(struct buf *bp)
1.9 oster 2054: {
2055: RF_DiskQueueData_t *req = NULL;
2056: RF_DiskQueue_t *queue;
2057:
2058: db1_printf(("recovering the request queue:\n"));
1.285 mrg 2059:
1.207 simonb 2060: req = bp->b_private;
1.1 oster 2061:
1.9 oster 2062: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 2063:
1.286 mrg 2064: rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285 mrg 2065:
1.175 oster 2066: #if RF_ACC_TRACE > 0
1.9 oster 2067: if (req->tracerec) {
2068: RF_ETIMER_STOP(req->tracerec->timer);
2069: RF_ETIMER_EVAL(req->tracerec->timer);
1.288 mrg 2070: rf_lock_mutex2(rf_tracing_mutex);
1.9 oster 2071: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2072: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2073: req->tracerec->num_phys_ios++;
1.288 mrg 2074: rf_unlock_mutex2(rf_tracing_mutex);
1.9 oster 2075: }
1.175 oster 2076: #endif
1.1 oster 2077:
1.230 ad 2078: /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9 oster 2079: * ballistic, and mark the component as hosed... */
1.36 oster 2080:
1.230 ad 2081: if (bp->b_error != 0) {
1.9 oster 2082: /* Mark the disk as dead */
2083: /* but only mark it once... */
1.186 perry 2084: /* and only if it wouldn't leave this RAID set
1.183 oster 2085: completely broken */
1.193 oster 2086: if (((queue->raidPtr->Disks[queue->col].status ==
2087: rf_ds_optimal) ||
2088: (queue->raidPtr->Disks[queue->col].status ==
1.385 riastrad 2089: rf_ds_used_spare)) &&
1.193 oster 2090: (queue->raidPtr->numFailures <
1.204 simonb 2091: queue->raidPtr->Layout.map->faultsTolerated)) {
1.322 prlw1 2092: printf("raid%d: IO Error (%d). Marking %s as failed.\n",
1.136 oster 2093: queue->raidPtr->raidid,
1.322 prlw1 2094: bp->b_error,
1.166 oster 2095: queue->raidPtr->Disks[queue->col].devname);
2096: queue->raidPtr->Disks[queue->col].status =
1.9 oster 2097: rf_ds_failed;
1.166 oster 2098: queue->raidPtr->status = rf_rs_degraded;
1.9 oster 2099: queue->raidPtr->numFailures++;
1.56 oster 2100: queue->raidPtr->numNewFailures++;
1.9 oster 2101: } else { /* Disk is already dead... */
2102: /* printf("Disk already marked as dead!\n"); */
2103: }
1.4 oster 2104:
1.9 oster 2105: }
1.4 oster 2106:
1.143 oster 2107: /* Fill in the error value */
1.230 ad 2108: req->error = bp->b_error;
1.143 oster 2109:
2110: /* Drop this one on the "finished" queue... */
2111: TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2112:
2113: /* Let the raidio thread know there is work to be done. */
1.286 mrg 2114: rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143 oster 2115:
1.286 mrg 2116: rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1 oster 2117: }
2118:
2119:
2120: /*
2121: * initialize a buf structure for doing an I/O in the kernel.
2122: */
1.186 perry 2123: static void
1.169 oster 2124: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225 christos 2125: RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.384 jdolecek 2126: void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector)
1.9 oster 2127: {
1.384 jdolecek 2128: bp->b_flags = rw_flag | (bp->b_flags & rf_b_pass);
1.242 ad 2129: bp->b_oflags = 0;
2130: bp->b_cflags = 0;
1.9 oster 2131: bp->b_bcount = numSect << logBytesPerSector;
2132: bp->b_bufsize = bp->b_bcount;
2133: bp->b_error = 0;
2134: bp->b_dev = dev;
1.187 christos 2135: bp->b_data = bf;
1.275 mrg 2136: bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9 oster 2137: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 2138: if (bp->b_bcount == 0) {
1.141 provos 2139: panic("bp->b_bcount is zero in InitBP!!");
1.1 oster 2140: }
1.9 oster 2141: bp->b_iodone = cbFunc;
1.207 simonb 2142: bp->b_private = cbArg;
1.1 oster 2143: }
2144:
2145: /*
2146: * Wait interruptibly for an exclusive lock.
2147: *
2148: * XXX
2149: * Several drivers do this; it should be abstracted and made MP-safe.
2150: * (Hmm... where have we seen this warning before :-> GO )
2151: */
2152: static int
1.169 oster 2153: raidlock(struct raid_softc *rs)
1.1 oster 2154: {
1.9 oster 2155: int error;
1.1 oster 2156:
1.335 mlelstv 2157: error = 0;
1.327 pgoyette 2158: mutex_enter(&rs->sc_mutex);
1.1 oster 2159: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2160: rs->sc_flags |= RAIDF_WANTED;
1.327 pgoyette 2161: error = cv_wait_sig(&rs->sc_cv, &rs->sc_mutex);
2162: if (error != 0)
1.335 mlelstv 2163: goto done;
1.1 oster 2164: }
2165: rs->sc_flags |= RAIDF_LOCKED;
1.335 mlelstv 2166: done:
1.327 pgoyette 2167: mutex_exit(&rs->sc_mutex);
1.389 skrll 2168: return error;
1.1 oster 2169: }
2170: /*
2171: * Unlock and wake up any waiters.
2172: */
2173: static void
1.169 oster 2174: raidunlock(struct raid_softc *rs)
1.1 oster 2175: {
2176:
1.327 pgoyette 2177: mutex_enter(&rs->sc_mutex);
1.1 oster 2178: rs->sc_flags &= ~RAIDF_LOCKED;
2179: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2180: rs->sc_flags &= ~RAIDF_WANTED;
1.327 pgoyette 2181: cv_broadcast(&rs->sc_cv);
1.1 oster 2182: }
1.327 pgoyette 2183: mutex_exit(&rs->sc_mutex);
1.11 oster 2184: }
1.186 perry 2185:
1.11 oster 2186:
2187: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2188: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1.269 jld 2189: #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
1.11 oster 2190:
1.276 mrg 2191: static daddr_t
2192: rf_component_info_offset(void)
2193: {
2194:
2195: return RF_COMPONENT_INFO_OFFSET;
2196: }
2197:
2198: static daddr_t
2199: rf_component_info_size(unsigned secsize)
2200: {
2201: daddr_t info_size;
2202:
2203: KASSERT(secsize);
2204: if (secsize > RF_COMPONENT_INFO_SIZE)
2205: info_size = secsize;
2206: else
2207: info_size = RF_COMPONENT_INFO_SIZE;
2208:
2209: return info_size;
2210: }
2211:
2212: static daddr_t
2213: rf_parity_map_offset(RF_Raid_t *raidPtr)
2214: {
2215: daddr_t map_offset;
2216:
2217: KASSERT(raidPtr->bytesPerSector);
2218: if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2219: map_offset = raidPtr->bytesPerSector;
2220: else
2221: map_offset = RF_COMPONENT_INFO_SIZE;
2222: map_offset += rf_component_info_offset();
2223:
2224: return map_offset;
2225: }
2226:
2227: static daddr_t
2228: rf_parity_map_size(RF_Raid_t *raidPtr)
2229: {
2230: daddr_t map_size;
2231:
2232: if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2233: map_size = raidPtr->bytesPerSector;
2234: else
2235: map_size = RF_PARITY_MAP_SIZE;
2236:
2237: return map_size;
2238: }
2239:
1.186 perry 2240: int
1.269 jld 2241: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12 oster 2242: {
1.269 jld 2243: RF_ComponentLabel_t *clabel;
2244:
2245: clabel = raidget_component_label(raidPtr, col);
2246: clabel->clean = RF_RAID_CLEAN;
2247: raidflush_component_label(raidPtr, col);
1.12 oster 2248: return(0);
2249: }
2250:
2251:
1.186 perry 2252: int
1.269 jld 2253: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11 oster 2254: {
1.269 jld 2255: RF_ComponentLabel_t *clabel;
2256:
2257: clabel = raidget_component_label(raidPtr, col);
2258: clabel->clean = RF_RAID_DIRTY;
2259: raidflush_component_label(raidPtr, col);
1.11 oster 2260: return(0);
2261: }
2262:
2263: int
1.269 jld 2264: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2265: {
1.276 mrg 2266: KASSERT(raidPtr->bytesPerSector);
1.394 ! mrg 2267:
1.276 mrg 2268: return raidread_component_label(raidPtr->bytesPerSector,
2269: raidPtr->Disks[col].dev,
1.385 riastrad 2270: raidPtr->raid_cinfo[col].ci_vp,
1.269 jld 2271: &raidPtr->raid_cinfo[col].ci_label);
2272: }
2273:
2274: RF_ComponentLabel_t *
2275: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2276: {
2277: return &raidPtr->raid_cinfo[col].ci_label;
2278: }
2279:
2280: int
2281: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2282: {
2283: RF_ComponentLabel_t *label;
2284:
2285: label = &raidPtr->raid_cinfo[col].ci_label;
2286: label->mod_counter = raidPtr->mod_counter;
2287: #ifndef RF_NO_PARITY_MAP
2288: label->parity_map_modcount = label->mod_counter;
2289: #endif
1.276 mrg 2290: return raidwrite_component_label(raidPtr->bytesPerSector,
2291: raidPtr->Disks[col].dev,
1.269 jld 2292: raidPtr->raid_cinfo[col].ci_vp, label);
2293: }
2294:
1.394 ! mrg 2295: /*
! 2296: * Swap the label endianness.
! 2297: *
! 2298: * Everything in the component label is 4-byte-swapped except the version,
! 2299: * which is kept in the byte-swapped version at all times, and indicates
! 2300: * for the writer that a swap is necessary.
! 2301: *
! 2302: * For reads it is expected that out_label == clabel, but writes expect
! 2303: * separate labels so only the re-swapped label is written out to disk,
! 2304: * leaving the swapped-except-version internally.
! 2305: *
! 2306: * Only support swapping label version 2.
! 2307: */
! 2308: static void
! 2309: rf_swap_label(RF_ComponentLabel_t *clabel, RF_ComponentLabel_t *out_label)
! 2310: {
! 2311: int *in, *out, *in_last;
! 2312:
! 2313: KASSERT(clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION));
! 2314:
! 2315: /* Don't swap the label, but do copy it. */
! 2316: out_label->version = clabel->version;
! 2317:
! 2318: in = &clabel->serial_number;
! 2319: in_last = &clabel->future_use2[42];
! 2320: out = &out_label->serial_number;
! 2321:
! 2322: for (; in < in_last; in++, out++)
! 2323: *out = bswap32(*in);
! 2324: }
1.269 jld 2325:
2326: static int
1.276 mrg 2327: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269 jld 2328: RF_ComponentLabel_t *clabel)
2329: {
1.394 ! mrg 2330: int error;
! 2331:
! 2332: error = raidread_component_area(dev, b_vp, clabel,
1.269 jld 2333: sizeof(RF_ComponentLabel_t),
1.276 mrg 2334: rf_component_info_offset(),
2335: rf_component_info_size(secsize));
1.394 ! mrg 2336:
! 2337: if (error == 0 &&
! 2338: clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
! 2339: rf_swap_label(clabel, clabel);
! 2340: }
! 2341:
! 2342: return error;
1.269 jld 2343: }
2344:
2345: /* ARGSUSED */
2346: static int
2347: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2348: size_t msize, daddr_t offset, daddr_t dsize)
1.11 oster 2349: {
2350: struct buf *bp;
2351: int error;
1.186 perry 2352:
1.11 oster 2353: /* XXX should probably ensure that we don't try to do this if
1.186 perry 2354: someone has changed rf_protected_sectors. */
1.11 oster 2355:
1.98 oster 2356: if (b_vp == NULL) {
2357: /* For whatever reason, this component is not valid.
2358: Don't try to read a component label from it. */
2359: return(EINVAL);
2360: }
2361:
1.11 oster 2362: /* get a block of the appropriate size... */
1.269 jld 2363: bp = geteblk((int)dsize);
1.11 oster 2364: bp->b_dev = dev;
2365:
2366: /* get our ducks in a row for the read */
1.269 jld 2367: bp->b_blkno = offset / DEV_BSIZE;
2368: bp->b_bcount = dsize;
1.100 chs 2369: bp->b_flags |= B_READ;
1.269 jld 2370: bp->b_resid = dsize;
1.11 oster 2371:
1.331 mlelstv 2372: bdev_strategy(bp);
1.340 christos 2373: error = biowait(bp);
1.11 oster 2374:
2375: if (!error) {
1.269 jld 2376: memcpy(data, bp->b_data, msize);
1.204 simonb 2377: }
1.11 oster 2378:
1.233 ad 2379: brelse(bp, 0);
1.11 oster 2380: return(error);
2381: }
1.269 jld 2382:
2383: static int
1.276 mrg 2384: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2385: RF_ComponentLabel_t *clabel)
1.269 jld 2386: {
1.394 ! mrg 2387: RF_ComponentLabel_t *clabel_write = clabel;
! 2388: RF_ComponentLabel_t lclabel;
! 2389: int error;
! 2390:
! 2391: if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) {
! 2392: clabel_write = &lclabel;
! 2393: rf_swap_label(clabel, clabel_write);
! 2394: }
! 2395: error = raidwrite_component_area(dev, b_vp, clabel_write,
1.269 jld 2396: sizeof(RF_ComponentLabel_t),
1.276 mrg 2397: rf_component_info_offset(),
2398: rf_component_info_size(secsize), 0);
1.394 ! mrg 2399:
! 2400: return error;
1.269 jld 2401: }
2402:
1.11 oster 2403: /* ARGSUSED */
1.269 jld 2404: static int
1.385 riastrad 2405: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
1.269 jld 2406: size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11 oster 2407: {
2408: struct buf *bp;
2409: int error;
2410:
2411: /* get a block of the appropriate size... */
1.269 jld 2412: bp = geteblk((int)dsize);
1.11 oster 2413: bp->b_dev = dev;
2414:
2415: /* get our ducks in a row for the write */
1.269 jld 2416: bp->b_blkno = offset / DEV_BSIZE;
2417: bp->b_bcount = dsize;
2418: bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2419: bp->b_resid = dsize;
1.11 oster 2420:
1.269 jld 2421: memset(bp->b_data, 0, dsize);
2422: memcpy(bp->b_data, data, msize);
1.11 oster 2423:
1.331 mlelstv 2424: bdev_strategy(bp);
1.269 jld 2425: if (asyncp)
2426: return 0;
1.340 christos 2427: error = biowait(bp);
1.233 ad 2428: brelse(bp, 0);
1.11 oster 2429: if (error) {
1.48 oster 2430: #if 1
1.11 oster 2431: printf("Failed to write RAID component info!\n");
1.48 oster 2432: #endif
1.11 oster 2433: }
2434:
2435: return(error);
1.1 oster 2436: }
1.12 oster 2437:
1.186 perry 2438: void
1.269 jld 2439: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2440: {
2441: int c;
2442:
2443: for (c = 0; c < raidPtr->numCol; c++) {
2444: /* Skip dead disks. */
2445: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2446: continue;
2447: /* XXXjld: what if an error occurs here? */
2448: raidwrite_component_area(raidPtr->Disks[c].dev,
2449: raidPtr->raid_cinfo[c].ci_vp, map,
2450: RF_PARITYMAP_NBYTE,
1.276 mrg 2451: rf_parity_map_offset(raidPtr),
2452: rf_parity_map_size(raidPtr), 0);
1.269 jld 2453: }
2454: }
2455:
2456: void
2457: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2458: {
2459: struct rf_paritymap_ondisk tmp;
1.272 oster 2460: int c,first;
1.269 jld 2461:
1.272 oster 2462: first=1;
1.269 jld 2463: for (c = 0; c < raidPtr->numCol; c++) {
2464: /* Skip dead disks. */
2465: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2466: continue;
2467: raidread_component_area(raidPtr->Disks[c].dev,
2468: raidPtr->raid_cinfo[c].ci_vp, &tmp,
2469: RF_PARITYMAP_NBYTE,
1.276 mrg 2470: rf_parity_map_offset(raidPtr),
2471: rf_parity_map_size(raidPtr));
1.272 oster 2472: if (first) {
1.269 jld 2473: memcpy(map, &tmp, sizeof(*map));
1.272 oster 2474: first = 0;
1.269 jld 2475: } else {
2476: rf_paritymap_merge(map, &tmp);
2477: }
2478: }
2479: }
2480:
2481: void
1.169 oster 2482: rf_markalldirty(RF_Raid_t *raidPtr)
1.12 oster 2483: {
1.269 jld 2484: RF_ComponentLabel_t *clabel;
1.146 oster 2485: int sparecol;
1.166 oster 2486: int c;
2487: int j;
2488: int scol = -1;
1.12 oster 2489:
2490: raidPtr->mod_counter++;
1.166 oster 2491: for (c = 0; c < raidPtr->numCol; c++) {
2492: /* we don't want to touch (at all) a disk that has
2493: failed */
2494: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269 jld 2495: clabel = raidget_component_label(raidPtr, c);
2496: if (clabel->status == rf_ds_spared) {
1.186 perry 2497: /* XXX do something special...
2498: but whatever you do, don't
1.166 oster 2499: try to access it!! */
2500: } else {
1.269 jld 2501: raidmarkdirty(raidPtr, c);
1.12 oster 2502: }
1.166 oster 2503: }
1.186 perry 2504: }
1.146 oster 2505:
1.12 oster 2506: for( c = 0; c < raidPtr->numSpare ; c++) {
2507: sparecol = raidPtr->numCol + c;
1.166 oster 2508: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2509: /*
2510:
2511: we claim this disk is "optimal" if it's
2512: rf_ds_used_spare, as that means it should be
2513: directly substitutable for the disk it replaced.
1.12 oster 2514: We note that too...
2515:
2516: */
2517:
1.166 oster 2518: for(j=0;j<raidPtr->numCol;j++) {
2519: if (raidPtr->Disks[j].spareCol == sparecol) {
2520: scol = j;
2521: break;
1.12 oster 2522: }
2523: }
1.186 perry 2524:
1.269 jld 2525: clabel = raidget_component_label(raidPtr, sparecol);
1.12 oster 2526: /* make sure status is noted */
1.146 oster 2527:
1.269 jld 2528: raid_init_component_label(raidPtr, clabel);
1.146 oster 2529:
1.269 jld 2530: clabel->row = 0;
2531: clabel->column = scol;
1.146 oster 2532: /* Note: we *don't* change status from rf_ds_used_spare
2533: to rf_ds_optimal */
2534: /* clabel.status = rf_ds_optimal; */
1.186 perry 2535:
1.269 jld 2536: raidmarkdirty(raidPtr, sparecol);
1.12 oster 2537: }
2538: }
2539: }
2540:
1.13 oster 2541:
2542: void
1.169 oster 2543: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13 oster 2544: {
1.269 jld 2545: RF_ComponentLabel_t *clabel;
1.13 oster 2546: int sparecol;
1.166 oster 2547: int c;
2548: int j;
2549: int scol;
1.341 christos 2550: struct raid_softc *rs = raidPtr->softc;
1.13 oster 2551:
2552: scol = -1;
2553:
1.186 perry 2554: /* XXX should do extra checks to make sure things really are clean,
1.13 oster 2555: rather than blindly setting the clean bit... */
2556:
2557: raidPtr->mod_counter++;
2558:
1.166 oster 2559: for (c = 0; c < raidPtr->numCol; c++) {
2560: if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269 jld 2561: clabel = raidget_component_label(raidPtr, c);
1.201 oster 2562: /* make sure status is noted */
1.269 jld 2563: clabel->status = rf_ds_optimal;
1.385 riastrad 2564:
1.214 oster 2565: /* note what unit we are configured as */
1.341 christos 2566: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2567: clabel->last_unit = raidPtr->raidid;
1.214 oster 2568:
1.269 jld 2569: raidflush_component_label(raidPtr, c);
1.166 oster 2570: if (final == RF_FINAL_COMPONENT_UPDATE) {
2571: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2572: raidmarkclean(raidPtr, c);
1.91 oster 2573: }
1.166 oster 2574: }
1.186 perry 2575: }
1.166 oster 2576: /* else we don't touch it.. */
1.186 perry 2577: }
1.63 oster 2578:
2579: for( c = 0; c < raidPtr->numSpare ; c++) {
2580: sparecol = raidPtr->numCol + c;
1.110 oster 2581: /* Need to ensure that the reconstruct actually completed! */
1.166 oster 2582: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2583: /*
2584:
2585: we claim this disk is "optimal" if it's
2586: rf_ds_used_spare, as that means it should be
2587: directly substitutable for the disk it replaced.
1.63 oster 2588: We note that too...
2589:
2590: */
2591:
1.166 oster 2592: for(j=0;j<raidPtr->numCol;j++) {
2593: if (raidPtr->Disks[j].spareCol == sparecol) {
2594: scol = j;
2595: break;
1.63 oster 2596: }
2597: }
1.186 perry 2598:
1.63 oster 2599: /* XXX shouldn't *really* need this... */
1.269 jld 2600: clabel = raidget_component_label(raidPtr, sparecol);
1.63 oster 2601: /* make sure status is noted */
2602:
1.269 jld 2603: raid_init_component_label(raidPtr, clabel);
2604:
2605: clabel->column = scol;
2606: clabel->status = rf_ds_optimal;
1.341 christos 2607: if ((rs->sc_cflags & RAIDF_UNIT_CHANGED) == 0)
2608: clabel->last_unit = raidPtr->raidid;
1.63 oster 2609:
1.269 jld 2610: raidflush_component_label(raidPtr, sparecol);
1.91 oster 2611: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2612: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2613: raidmarkclean(raidPtr, sparecol);
1.13 oster 2614: }
2615: }
2616: }
2617: }
1.68 oster 2618: }
2619:
2620: void
1.169 oster 2621: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69 oster 2622: {
2623:
2624: if (vp != NULL) {
2625: if (auto_configured == 1) {
1.96 oster 2626: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2627: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69 oster 2628: vput(vp);
1.186 perry 2629:
2630: } else {
1.244 ad 2631: (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69 oster 2632: }
1.186 perry 2633: }
1.69 oster 2634: }
2635:
2636:
2637: void
1.169 oster 2638: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68 oster 2639: {
1.186 perry 2640: int r,c;
1.69 oster 2641: struct vnode *vp;
2642: int acd;
1.68 oster 2643:
2644:
2645: /* We take this opportunity to close the vnodes like we should.. */
2646:
1.166 oster 2647: for (c = 0; c < raidPtr->numCol; c++) {
2648: vp = raidPtr->raid_cinfo[c].ci_vp;
2649: acd = raidPtr->Disks[c].auto_configured;
2650: rf_close_component(raidPtr, vp, acd);
2651: raidPtr->raid_cinfo[c].ci_vp = NULL;
2652: raidPtr->Disks[c].auto_configured = 0;
1.68 oster 2653: }
1.166 oster 2654:
1.68 oster 2655: for (r = 0; r < raidPtr->numSpare; r++) {
1.166 oster 2656: vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2657: acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69 oster 2658: rf_close_component(raidPtr, vp, acd);
1.166 oster 2659: raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2660: raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2661: }
1.37 oster 2662: }
1.63 oster 2663:
1.37 oster 2664:
1.393 mrg 2665: static void
1.353 mrg 2666: rf_ReconThread(struct rf_recon_req_internal *req)
1.37 oster 2667: {
2668: int s;
2669: RF_Raid_t *raidPtr;
2670:
2671: s = splbio();
2672: raidPtr = (RF_Raid_t *) req->raidPtr;
2673: raidPtr->recon_in_progress = 1;
2674:
1.166 oster 2675: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37 oster 2676: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2677:
2678: RF_Free(req, sizeof(*req));
2679:
2680: raidPtr->recon_in_progress = 0;
2681: splx(s);
2682:
2683: /* That's all... */
1.204 simonb 2684: kthread_exit(0); /* does not return */
1.37 oster 2685: }
2686:
1.393 mrg 2687: static void
1.169 oster 2688: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37 oster 2689: {
2690: int retcode;
2691: int s;
2692:
1.184 oster 2693: raidPtr->parity_rewrite_stripes_done = 0;
1.37 oster 2694: raidPtr->parity_rewrite_in_progress = 1;
2695: s = splbio();
2696: retcode = rf_RewriteParity(raidPtr);
2697: splx(s);
2698: if (retcode) {
1.279 christos 2699: printf("raid%d: Error re-writing parity (%d)!\n",
2700: raidPtr->raidid, retcode);
1.37 oster 2701: } else {
2702: /* set the clean bit! If we shutdown correctly,
2703: the clean bit on each component label will get
2704: set */
2705: raidPtr->parity_good = RF_RAID_CLEAN;
2706: }
2707: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2708:
2709: /* Anyone waiting for us to stop? If so, inform them... */
2710: if (raidPtr->waitShutdown) {
1.357 mrg 2711: rf_lock_mutex2(raidPtr->rad_lock);
2712: cv_broadcast(&raidPtr->parity_rewrite_cv);
2713: rf_unlock_mutex2(raidPtr->rad_lock);
1.85 oster 2714: }
1.37 oster 2715:
2716: /* That's all... */
1.204 simonb 2717: kthread_exit(0); /* does not return */
1.37 oster 2718: }
2719:
2720:
1.393 mrg 2721: static void
1.169 oster 2722: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37 oster 2723: {
2724: int s;
2725:
2726: raidPtr->copyback_in_progress = 1;
2727: s = splbio();
2728: rf_CopybackReconstructedData(raidPtr);
2729: splx(s);
2730: raidPtr->copyback_in_progress = 0;
2731:
2732: /* That's all... */
1.204 simonb 2733: kthread_exit(0); /* does not return */
1.37 oster 2734: }
2735:
2736:
1.393 mrg 2737: static void
1.353 mrg 2738: rf_ReconstructInPlaceThread(struct rf_recon_req_internal *req)
1.37 oster 2739: {
2740: int s;
2741: RF_Raid_t *raidPtr;
1.186 perry 2742:
1.37 oster 2743: s = splbio();
2744: raidPtr = req->raidPtr;
2745: raidPtr->recon_in_progress = 1;
1.166 oster 2746: rf_ReconstructInPlace(raidPtr, req->col);
1.37 oster 2747: RF_Free(req, sizeof(*req));
2748: raidPtr->recon_in_progress = 0;
2749: splx(s);
2750:
2751: /* That's all... */
1.204 simonb 2752: kthread_exit(0); /* does not return */
1.48 oster 2753: }
2754:
1.213 christos 2755: static RF_AutoConfig_t *
2756: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276 mrg 2757: const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2758: unsigned secsize)
1.213 christos 2759: {
2760: int good_one = 0;
1.385 riastrad 2761: RF_ComponentLabel_t *clabel;
1.213 christos 2762: RF_AutoConfig_t *ac;
2763:
1.379 chs 2764: clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_WAITOK);
1.213 christos 2765:
1.276 mrg 2766: if (!raidread_component_label(secsize, dev, vp, clabel)) {
2767: /* Got the label. Does it look reasonable? */
1.385 riastrad 2768: if (rf_reasonable_label(clabel, numsecs) &&
1.282 enami 2769: (rf_component_label_partitionsize(clabel) <= size)) {
1.224 oster 2770: #ifdef DEBUG
1.276 mrg 2771: printf("Component on: %s: %llu\n",
1.213 christos 2772: cname, (unsigned long long)size);
1.276 mrg 2773: rf_print_component_label(clabel);
1.213 christos 2774: #endif
1.276 mrg 2775: /* if it's reasonable, add it, else ignore it. */
2776: ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.379 chs 2777: M_WAITOK);
1.276 mrg 2778: strlcpy(ac->devname, cname, sizeof(ac->devname));
2779: ac->dev = dev;
2780: ac->vp = vp;
2781: ac->clabel = clabel;
2782: ac->next = ac_list;
2783: ac_list = ac;
2784: good_one = 1;
2785: }
1.213 christos 2786: }
2787: if (!good_one) {
2788: /* cleanup */
2789: free(clabel, M_RAIDFRAME);
2790: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2791: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213 christos 2792: vput(vp);
2793: }
2794: return ac_list;
2795: }
2796:
1.393 mrg 2797: static RF_AutoConfig_t *
1.259 cegger 2798: rf_find_raid_components(void)
1.48 oster 2799: {
2800: struct vnode *vp;
2801: struct disklabel label;
1.261 dyoung 2802: device_t dv;
1.268 dyoung 2803: deviter_t di;
1.48 oster 2804: dev_t dev;
1.296 buhrow 2805: int bmajor, bminor, wedge, rf_part_found;
1.48 oster 2806: int error;
2807: int i;
2808: RF_AutoConfig_t *ac_list;
1.276 mrg 2809: uint64_t numsecs;
2810: unsigned secsize;
1.335 mlelstv 2811: int dowedges;
1.48 oster 2812:
2813: /* initialize the AutoConfig list */
2814: ac_list = NULL;
2815:
1.335 mlelstv 2816: /*
2817: * we begin by trolling through *all* the devices on the system *twice*
2818: * first we scan for wedges, second for other devices. This avoids
2819: * using a raw partition instead of a wedge that covers the whole disk
2820: */
1.48 oster 2821:
1.335 mlelstv 2822: for (dowedges=1; dowedges>=0; --dowedges) {
2823: for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
2824: dv = deviter_next(&di)) {
1.48 oster 2825:
1.393 mrg 2826: /* we are only interested in disks */
1.335 mlelstv 2827: if (device_class(dv) != DV_DISK)
2828: continue;
1.48 oster 2829:
1.393 mrg 2830: /* we don't care about floppies */
1.335 mlelstv 2831: if (device_is_a(dv, "fd")) {
2832: continue;
2833: }
1.129 oster 2834:
1.393 mrg 2835: /* we don't care about CDs. */
1.335 mlelstv 2836: if (device_is_a(dv, "cd")) {
2837: continue;
2838: }
1.129 oster 2839:
1.393 mrg 2840: /* we don't care about md. */
1.335 mlelstv 2841: if (device_is_a(dv, "md")) {
2842: continue;
2843: }
1.248 oster 2844:
1.335 mlelstv 2845: /* hdfd is the Atari/Hades floppy driver */
2846: if (device_is_a(dv, "hdfd")) {
2847: continue;
2848: }
1.206 thorpej 2849:
1.335 mlelstv 2850: /* fdisa is the Atari/Milan floppy driver */
2851: if (device_is_a(dv, "fdisa")) {
2852: continue;
2853: }
1.186 perry 2854:
1.393 mrg 2855: /* we don't care about spiflash */
2856: if (device_is_a(dv, "spiflash")) {
2857: continue;
2858: }
2859:
1.335 mlelstv 2860: /* are we in the wedges pass ? */
2861: wedge = device_is_a(dv, "dk");
2862: if (wedge != dowedges) {
2863: continue;
2864: }
1.48 oster 2865:
1.335 mlelstv 2866: /* need to find the device_name_to_block_device_major stuff */
2867: bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.296 buhrow 2868:
1.335 mlelstv 2869: rf_part_found = 0; /*No raid partition as yet*/
1.48 oster 2870:
1.335 mlelstv 2871: /* get a vnode for the raw partition of this disk */
2872: bminor = minor(device_unit(dv));
2873: dev = wedge ? makedev(bmajor, bminor) :
2874: MAKEDISKDEV(bmajor, bminor, RAW_PART);
2875: if (bdevvp(dev, &vp))
2876: panic("RAID can't alloc vnode");
1.48 oster 2877:
1.375 hannken 2878: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 2879: error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48 oster 2880:
1.335 mlelstv 2881: if (error) {
2882: /* "Who cares." Continue looking
2883: for something that exists*/
2884: vput(vp);
2885: continue;
2886: }
1.48 oster 2887:
1.335 mlelstv 2888: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 2889: if (error) {
1.339 mlelstv 2890: /*
2891: * Pseudo devices like vnd and cgd can be
2892: * opened but may still need some configuration.
2893: * Ignore these quietly.
2894: */
2895: if (error != ENXIO)
2896: printf("RAIDframe: can't get disk size"
2897: " for dev %s (%d)\n",
2898: device_xname(dv), error);
1.241 oster 2899: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2900: vput(vp);
1.213 christos 2901: continue;
2902: }
1.335 mlelstv 2903: if (wedge) {
2904: struct dkwedge_info dkw;
2905: error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
2906: NOCRED);
2907: if (error) {
2908: printf("RAIDframe: can't get wedge info for "
2909: "dev %s (%d)\n", device_xname(dv), error);
2910: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2911: vput(vp);
2912: continue;
2913: }
1.213 christos 2914:
1.335 mlelstv 2915: if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
2916: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2917: vput(vp);
2918: continue;
2919: }
1.385 riastrad 2920:
1.375 hannken 2921: VOP_UNLOCK(vp);
1.335 mlelstv 2922: ac_list = rf_get_component(ac_list, dev, vp,
2923: device_xname(dv), dkw.dkw_size, numsecs, secsize);
2924: rf_part_found = 1; /*There is a raid component on this disk*/
1.228 christos 2925: continue;
1.241 oster 2926: }
1.213 christos 2927:
1.335 mlelstv 2928: /* Ok, the disk exists. Go get the disklabel. */
2929: error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
2930: if (error) {
2931: /*
2932: * XXX can't happen - open() would
2933: * have errored out (or faked up one)
2934: */
2935: if (error != ENOTTY)
2936: printf("RAIDframe: can't get label for dev "
2937: "%s (%d)\n", device_xname(dv), error);
2938: }
1.48 oster 2939:
1.335 mlelstv 2940: /* don't need this any more. We'll allocate it again
2941: a little later if we really do... */
2942: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
2943: vput(vp);
1.48 oster 2944:
1.335 mlelstv 2945: if (error)
1.48 oster 2946: continue;
2947:
1.335 mlelstv 2948: rf_part_found = 0; /*No raid partitions yet*/
2949: for (i = 0; i < label.d_npartitions; i++) {
2950: char cname[sizeof(ac_list->devname)];
2951:
2952: /* We only support partitions marked as RAID */
2953: if (label.d_partitions[i].p_fstype != FS_RAID)
2954: continue;
2955:
2956: dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
2957: if (bdevvp(dev, &vp))
2958: panic("RAID can't alloc vnode");
2959:
1.375 hannken 2960: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 2961: error = VOP_OPEN(vp, FREAD, NOCRED);
2962: if (error) {
2963: /* Whatever... */
2964: vput(vp);
2965: continue;
2966: }
1.375 hannken 2967: VOP_UNLOCK(vp);
1.335 mlelstv 2968: snprintf(cname, sizeof(cname), "%s%c",
2969: device_xname(dv), 'a' + i);
2970: ac_list = rf_get_component(ac_list, dev, vp, cname,
2971: label.d_partitions[i].p_size, numsecs, secsize);
2972: rf_part_found = 1; /*There is at least one raid partition on this disk*/
1.48 oster 2973: }
1.296 buhrow 2974:
1.335 mlelstv 2975: /*
2976: *If there is no raid component on this disk, either in a
2977: *disklabel or inside a wedge, check the raw partition as well,
2978: *as it is possible to configure raid components on raw disk
2979: *devices.
2980: */
1.296 buhrow 2981:
1.335 mlelstv 2982: if (!rf_part_found) {
2983: char cname[sizeof(ac_list->devname)];
1.296 buhrow 2984:
1.335 mlelstv 2985: dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
2986: if (bdevvp(dev, &vp))
2987: panic("RAID can't alloc vnode");
2988:
1.375 hannken 2989: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
2990:
1.335 mlelstv 2991: error = VOP_OPEN(vp, FREAD, NOCRED);
2992: if (error) {
2993: /* Whatever... */
2994: vput(vp);
2995: continue;
2996: }
1.375 hannken 2997: VOP_UNLOCK(vp);
1.335 mlelstv 2998: snprintf(cname, sizeof(cname), "%s%c",
2999: device_xname(dv), 'a' + RAW_PART);
3000: ac_list = rf_get_component(ac_list, dev, vp, cname,
3001: label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.296 buhrow 3002: }
1.48 oster 3003: }
1.335 mlelstv 3004: deviter_release(&di);
1.48 oster 3005: }
1.213 christos 3006: return ac_list;
1.48 oster 3007: }
1.186 perry 3008:
1.292 oster 3009: int
1.284 mrg 3010: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48 oster 3011: {
1.186 perry 3012:
1.393 mrg 3013: if ((clabel->version==RF_COMPONENT_LABEL_VERSION_1 ||
1.394 ! mrg 3014: clabel->version==RF_COMPONENT_LABEL_VERSION ||
! 3015: clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION)) &&
1.393 mrg 3016: (clabel->clean == RF_RAID_CLEAN ||
3017: clabel->clean == RF_RAID_DIRTY) &&
1.186 perry 3018: clabel->row >=0 &&
3019: clabel->column >= 0 &&
1.48 oster 3020: clabel->num_rows > 0 &&
3021: clabel->num_columns > 0 &&
1.186 perry 3022: clabel->row < clabel->num_rows &&
1.48 oster 3023: clabel->column < clabel->num_columns &&
3024: clabel->blockSize > 0 &&
1.282 enami 3025: /*
3026: * numBlocksHi may contain garbage, but it is ok since
3027: * the type is unsigned. If it is really garbage,
3028: * rf_fix_old_label_size() will fix it.
3029: */
3030: rf_component_label_numblocks(clabel) > 0) {
1.284 mrg 3031: /*
3032: * label looks reasonable enough...
3033: * let's make sure it has no old garbage.
3034: */
1.292 oster 3035: if (numsecs)
3036: rf_fix_old_label_size(clabel, numsecs);
1.48 oster 3037: return(1);
3038: }
3039: return(0);
3040: }
3041:
3042:
1.278 mrg 3043: /*
3044: * For reasons yet unknown, some old component labels have garbage in
3045: * the newer numBlocksHi region, and this causes lossage. Since those
3046: * disks will also have numsecs set to less than 32 bits of sectors,
1.299 oster 3047: * we can determine when this corruption has occurred, and fix it.
1.284 mrg 3048: *
3049: * The exact same problem, with the same unknown reason, happens to
3050: * the partitionSizeHi member as well.
1.278 mrg 3051: */
3052: static void
3053: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3054: {
3055:
1.284 mrg 3056: if (numsecs < ((uint64_t)1 << 32)) {
3057: if (clabel->numBlocksHi) {
3058: printf("WARNING: total sectors < 32 bits, yet "
3059: "numBlocksHi set\n"
3060: "WARNING: resetting numBlocksHi to zero.\n");
3061: clabel->numBlocksHi = 0;
3062: }
3063:
3064: if (clabel->partitionSizeHi) {
3065: printf("WARNING: total sectors < 32 bits, yet "
3066: "partitionSizeHi set\n"
3067: "WARNING: resetting partitionSizeHi to zero.\n");
3068: clabel->partitionSizeHi = 0;
3069: }
1.278 mrg 3070: }
3071: }
3072:
3073:
1.224 oster 3074: #ifdef DEBUG
1.48 oster 3075: void
1.169 oster 3076: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48 oster 3077: {
1.282 enami 3078: uint64_t numBlocks;
1.308 christos 3079: static const char *rp[] = {
3080: "No", "Force", "Soft", "*invalid*"
3081: };
3082:
1.275 mrg 3083:
1.282 enami 3084: numBlocks = rf_component_label_numblocks(clabel);
1.275 mrg 3085:
1.48 oster 3086: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186 perry 3087: clabel->row, clabel->column,
1.48 oster 3088: clabel->num_rows, clabel->num_columns);
3089: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3090: clabel->version, clabel->serial_number,
3091: clabel->mod_counter);
3092: printf(" Clean: %s Status: %d\n",
1.271 dyoung 3093: clabel->clean ? "Yes" : "No", clabel->status);
1.48 oster 3094: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3095: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275 mrg 3096: printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3097: (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271 dyoung 3098: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.308 christos 3099: printf(" Root partition: %s\n", rp[clabel->root_partition & 3]);
1.271 dyoung 3100: printf(" Last configured as: raid%d\n", clabel->last_unit);
1.51 oster 3101: #if 0
3102: printf(" Config order: %d\n", clabel->config_order);
3103: #endif
1.186 perry 3104:
1.48 oster 3105: }
1.133 oster 3106: #endif
1.48 oster 3107:
1.393 mrg 3108: static RF_ConfigSet_t *
1.169 oster 3109: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48 oster 3110: {
3111: RF_AutoConfig_t *ac;
3112: RF_ConfigSet_t *config_sets;
3113: RF_ConfigSet_t *cset;
3114: RF_AutoConfig_t *ac_next;
3115:
3116:
3117: config_sets = NULL;
3118:
3119: /* Go through the AutoConfig list, and figure out which components
3120: belong to what sets. */
3121: ac = ac_list;
3122: while(ac!=NULL) {
3123: /* we're going to putz with ac->next, so save it here
3124: for use at the end of the loop */
3125: ac_next = ac->next;
3126:
3127: if (config_sets == NULL) {
3128: /* will need at least this one... */
1.379 chs 3129: config_sets = malloc(sizeof(RF_ConfigSet_t),
3130: M_RAIDFRAME, M_WAITOK);
1.48 oster 3131: /* this one is easy :) */
3132: config_sets->ac = ac;
3133: config_sets->next = NULL;
1.51 oster 3134: config_sets->rootable = 0;
1.48 oster 3135: ac->next = NULL;
3136: } else {
3137: /* which set does this component fit into? */
3138: cset = config_sets;
3139: while(cset!=NULL) {
1.49 oster 3140: if (rf_does_it_fit(cset, ac)) {
1.86 oster 3141: /* looks like it matches... */
3142: ac->next = cset->ac;
3143: cset->ac = ac;
1.48 oster 3144: break;
3145: }
3146: cset = cset->next;
3147: }
3148: if (cset==NULL) {
3149: /* didn't find a match above... new set..*/
1.379 chs 3150: cset = malloc(sizeof(RF_ConfigSet_t),
3151: M_RAIDFRAME, M_WAITOK);
1.48 oster 3152: cset->ac = ac;
3153: ac->next = NULL;
3154: cset->next = config_sets;
1.51 oster 3155: cset->rootable = 0;
1.48 oster 3156: config_sets = cset;
3157: }
3158: }
3159: ac = ac_next;
3160: }
3161:
3162:
3163: return(config_sets);
3164: }
3165:
3166: static int
1.169 oster 3167: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48 oster 3168: {
3169: RF_ComponentLabel_t *clabel1, *clabel2;
3170:
3171: /* If this one matches the *first* one in the set, that's good
3172: enough, since the other members of the set would have been
3173: through here too... */
1.60 oster 3174: /* note that we are not checking partitionSize here..
3175:
3176: Note that we are also not checking the mod_counters here.
1.299 oster 3177: If everything else matches except the mod_counter, that's
1.60 oster 3178: good enough for this test. We will deal with the mod_counters
1.186 perry 3179: a little later in the autoconfiguration process.
1.60 oster 3180:
3181: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 3182:
3183: The reason we don't check for this is that failed disks
3184: will have lower modification counts. If those disks are
3185: not added to the set they used to belong to, then they will
3186: form their own set, which may result in 2 different sets,
3187: for example, competing to be configured at raid0, and
3188: perhaps competing to be the root filesystem set. If the
3189: wrong ones get configured, or both attempt to become /,
3190: weird behaviour and or serious lossage will occur. Thus we
3191: need to bring them into the fold here, and kick them out at
3192: a later point.
1.60 oster 3193:
3194: */
1.48 oster 3195:
3196: clabel1 = cset->ac->clabel;
3197: clabel2 = ac->clabel;
3198: if ((clabel1->version == clabel2->version) &&
3199: (clabel1->serial_number == clabel2->serial_number) &&
3200: (clabel1->num_rows == clabel2->num_rows) &&
3201: (clabel1->num_columns == clabel2->num_columns) &&
3202: (clabel1->sectPerSU == clabel2->sectPerSU) &&
3203: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3204: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3205: (clabel1->parityConfig == clabel2->parityConfig) &&
3206: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3207: (clabel1->blockSize == clabel2->blockSize) &&
1.282 enami 3208: rf_component_label_numblocks(clabel1) ==
3209: rf_component_label_numblocks(clabel2) &&
1.48 oster 3210: (clabel1->autoconfigure == clabel2->autoconfigure) &&
3211: (clabel1->root_partition == clabel2->root_partition) &&
3212: (clabel1->last_unit == clabel2->last_unit) &&
3213: (clabel1->config_order == clabel2->config_order)) {
3214: /* if it get's here, it almost *has* to be a match */
3215: } else {
1.186 perry 3216: /* it's not consistent with somebody in the set..
1.48 oster 3217: punt */
3218: return(0);
3219: }
3220: /* all was fine.. it must fit... */
3221: return(1);
3222: }
3223:
1.393 mrg 3224: static int
1.169 oster 3225: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48 oster 3226: {
1.51 oster 3227: RF_AutoConfig_t *ac;
3228: RF_AutoConfig_t *auto_config;
3229: RF_ComponentLabel_t *clabel;
1.166 oster 3230: int c;
1.51 oster 3231: int num_cols;
3232: int num_missing;
1.86 oster 3233: int mod_counter;
1.87 oster 3234: int mod_counter_found;
1.88 oster 3235: int even_pair_failed;
3236: char parity_type;
1.186 perry 3237:
1.51 oster 3238:
1.48 oster 3239: /* check to see that we have enough 'live' components
3240: of this set. If so, we can configure it if necessary */
3241:
1.51 oster 3242: num_cols = cset->ac->clabel->num_columns;
1.88 oster 3243: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 3244:
3245: /* XXX Check for duplicate components!?!?!? */
3246:
1.86 oster 3247: /* Determine what the mod_counter is supposed to be for this set. */
3248:
1.87 oster 3249: mod_counter_found = 0;
1.101 oster 3250: mod_counter = 0;
1.86 oster 3251: ac = cset->ac;
3252: while(ac!=NULL) {
1.87 oster 3253: if (mod_counter_found==0) {
1.86 oster 3254: mod_counter = ac->clabel->mod_counter;
1.87 oster 3255: mod_counter_found = 1;
3256: } else {
3257: if (ac->clabel->mod_counter > mod_counter) {
3258: mod_counter = ac->clabel->mod_counter;
3259: }
1.86 oster 3260: }
3261: ac = ac->next;
3262: }
3263:
1.51 oster 3264: num_missing = 0;
3265: auto_config = cset->ac;
3266:
1.166 oster 3267: even_pair_failed = 0;
3268: for(c=0; c<num_cols; c++) {
3269: ac = auto_config;
3270: while(ac!=NULL) {
1.186 perry 3271: if ((ac->clabel->column == c) &&
1.166 oster 3272: (ac->clabel->mod_counter == mod_counter)) {
3273: /* it's this one... */
1.224 oster 3274: #ifdef DEBUG
1.166 oster 3275: printf("Found: %s at %d\n",
3276: ac->devname,c);
1.51 oster 3277: #endif
1.166 oster 3278: break;
1.51 oster 3279: }
1.166 oster 3280: ac=ac->next;
3281: }
3282: if (ac==NULL) {
1.51 oster 3283: /* Didn't find one here! */
1.88 oster 3284: /* special case for RAID 1, especially
3285: where there are more than 2
3286: components (where RAIDframe treats
3287: things a little differently :( ) */
1.166 oster 3288: if (parity_type == '1') {
3289: if (c%2 == 0) { /* even component */
3290: even_pair_failed = 1;
3291: } else { /* odd component. If
3292: we're failed, and
3293: so is the even
3294: component, it's
3295: "Good Night, Charlie" */
3296: if (even_pair_failed == 1) {
3297: return(0);
1.88 oster 3298: }
3299: }
1.166 oster 3300: } else {
3301: /* normal accounting */
3302: num_missing++;
1.88 oster 3303: }
1.166 oster 3304: }
3305: if ((parity_type == '1') && (c%2 == 1)) {
1.88 oster 3306: /* Just did an even component, and we didn't
1.186 perry 3307: bail.. reset the even_pair_failed flag,
1.88 oster 3308: and go on to the next component.... */
1.166 oster 3309: even_pair_failed = 0;
1.51 oster 3310: }
3311: }
3312:
3313: clabel = cset->ac->clabel;
3314:
3315: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3316: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3317: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3318: /* XXX this needs to be made *much* more general */
3319: /* Too many failures */
3320: return(0);
3321: }
3322: /* otherwise, all is well, and we've got enough to take a kick
3323: at autoconfiguring this set */
3324: return(1);
1.48 oster 3325: }
3326:
1.393 mrg 3327: static void
1.169 oster 3328: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222 christos 3329: RF_Raid_t *raidPtr)
1.48 oster 3330: {
3331: RF_ComponentLabel_t *clabel;
1.77 oster 3332: int i;
1.48 oster 3333:
3334: clabel = ac->clabel;
3335:
3336: /* 1. Fill in the common stuff */
3337: config->numCol = clabel->num_columns;
3338: config->numSpare = 0; /* XXX should this be set here? */
3339: config->sectPerSU = clabel->sectPerSU;
3340: config->SUsPerPU = clabel->SUsPerPU;
3341: config->SUsPerRU = clabel->SUsPerRU;
3342: config->parityConfig = clabel->parityConfig;
3343: /* XXX... */
3344: strcpy(config->diskQueueType,"fifo");
3345: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3346: config->layoutSpecificSize = 0; /* XXX ?? */
3347:
3348: while(ac!=NULL) {
3349: /* row/col values will be in range due to the checks
3350: in reasonable_label() */
1.166 oster 3351: strcpy(config->devnames[0][ac->clabel->column],
1.48 oster 3352: ac->devname);
3353: ac = ac->next;
3354: }
3355:
1.77 oster 3356: for(i=0;i<RF_MAXDBGV;i++) {
1.163 fvdl 3357: config->debugVars[i][0] = 0;
1.77 oster 3358: }
1.48 oster 3359: }
3360:
1.393 mrg 3361: static int
1.169 oster 3362: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3363: {
1.269 jld 3364: RF_ComponentLabel_t *clabel;
1.166 oster 3365: int column;
1.148 oster 3366: int sparecol;
1.48 oster 3367:
1.54 oster 3368: raidPtr->autoconfigure = new_value;
1.166 oster 3369:
3370: for(column=0; column<raidPtr->numCol; column++) {
3371: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3372: clabel = raidget_component_label(raidPtr, column);
3373: clabel->autoconfigure = new_value;
3374: raidflush_component_label(raidPtr, column);
1.48 oster 3375: }
3376: }
1.148 oster 3377: for(column = 0; column < raidPtr->numSpare ; column++) {
3378: sparecol = raidPtr->numCol + column;
1.166 oster 3379: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3380: clabel = raidget_component_label(raidPtr, sparecol);
3381: clabel->autoconfigure = new_value;
3382: raidflush_component_label(raidPtr, sparecol);
1.148 oster 3383: }
3384: }
1.48 oster 3385: return(new_value);
3386: }
3387:
1.393 mrg 3388: static int
1.169 oster 3389: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3390: {
1.269 jld 3391: RF_ComponentLabel_t *clabel;
1.166 oster 3392: int column;
1.148 oster 3393: int sparecol;
1.48 oster 3394:
1.54 oster 3395: raidPtr->root_partition = new_value;
1.166 oster 3396: for(column=0; column<raidPtr->numCol; column++) {
3397: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3398: clabel = raidget_component_label(raidPtr, column);
3399: clabel->root_partition = new_value;
3400: raidflush_component_label(raidPtr, column);
1.148 oster 3401: }
3402: }
3403: for(column = 0; column < raidPtr->numSpare ; column++) {
3404: sparecol = raidPtr->numCol + column;
1.166 oster 3405: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3406: clabel = raidget_component_label(raidPtr, sparecol);
3407: clabel->root_partition = new_value;
3408: raidflush_component_label(raidPtr, sparecol);
1.48 oster 3409: }
3410: }
3411: return(new_value);
3412: }
3413:
1.393 mrg 3414: static void
1.169 oster 3415: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48 oster 3416: {
3417: RF_AutoConfig_t *ac;
1.186 perry 3418:
1.48 oster 3419: ac = cset->ac;
3420: while(ac!=NULL) {
3421: /* Close the vp, and give it back */
3422: if (ac->vp) {
1.96 oster 3423: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.335 mlelstv 3424: VOP_CLOSE(ac->vp, FREAD | FWRITE, NOCRED);
1.48 oster 3425: vput(ac->vp);
1.86 oster 3426: ac->vp = NULL;
1.48 oster 3427: }
3428: ac = ac->next;
3429: }
3430: }
3431:
3432:
1.393 mrg 3433: static void
1.169 oster 3434: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48 oster 3435: {
3436: RF_AutoConfig_t *ac;
3437: RF_AutoConfig_t *next_ac;
1.186 perry 3438:
1.48 oster 3439: ac = cset->ac;
3440: while(ac!=NULL) {
3441: next_ac = ac->next;
3442: /* nuke the label */
3443: free(ac->clabel, M_RAIDFRAME);
3444: /* cleanup the config structure */
3445: free(ac, M_RAIDFRAME);
3446: /* "next.." */
3447: ac = next_ac;
3448: }
3449: /* and, finally, nuke the config set */
3450: free(cset, M_RAIDFRAME);
3451: }
3452:
3453:
3454: void
1.169 oster 3455: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48 oster 3456: {
1.394 ! mrg 3457: /* avoid over-writing byteswapped version. */
! 3458: if (clabel->version != bswap32(RF_COMPONENT_LABEL_VERSION))
! 3459: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3460: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3461: clabel->mod_counter = raidPtr->mod_counter;
1.269 jld 3462:
1.166 oster 3463: clabel->num_rows = 1;
1.48 oster 3464: clabel->num_columns = raidPtr->numCol;
3465: clabel->clean = RF_RAID_DIRTY; /* not clean */
3466: clabel->status = rf_ds_optimal; /* "It's good!" */
1.186 perry 3467:
1.48 oster 3468: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3469: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3470: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3471:
3472: clabel->blockSize = raidPtr->bytesPerSector;
1.282 enami 3473: rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54 oster 3474:
1.48 oster 3475: /* XXX not portable */
3476: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3477: clabel->maxOutstanding = raidPtr->maxOutstanding;
3478: clabel->autoconfigure = raidPtr->autoconfigure;
3479: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3480: clabel->last_unit = raidPtr->raidid;
1.54 oster 3481: clabel->config_order = raidPtr->config_order;
1.269 jld 3482:
3483: #ifndef RF_NO_PARITY_MAP
3484: rf_paritymap_init_label(raidPtr->parity_map, clabel);
3485: #endif
1.51 oster 3486: }
3487:
1.393 mrg 3488: static struct raid_softc *
1.300 christos 3489: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51 oster 3490: {
3491: RF_Raid_t *raidPtr;
3492: RF_Config_t *config;
3493: int raidID;
1.300 christos 3494: struct raid_softc *sc;
1.51 oster 3495:
1.224 oster 3496: #ifdef DEBUG
1.72 oster 3497: printf("RAID autoconfigure\n");
1.127 oster 3498: #endif
1.51 oster 3499:
3500: /* 1. Create a config structure */
1.379 chs 3501: config = malloc(sizeof(*config), M_RAIDFRAME, M_WAITOK|M_ZERO);
1.77 oster 3502:
1.186 perry 3503: /*
3504: 2. Figure out what RAID ID this one is supposed to live at
1.51 oster 3505: See if we can get the same RAID dev that it was configured
1.186 perry 3506: on last time..
1.51 oster 3507: */
3508:
3509: raidID = cset->ac->clabel->last_unit;
1.327 pgoyette 3510: for (sc = raidget(raidID, false); sc && sc->sc_r.valid != 0;
3511: sc = raidget(++raidID, false))
1.300 christos 3512: continue;
1.224 oster 3513: #ifdef DEBUG
1.72 oster 3514: printf("Configuring raid%d:\n",raidID);
1.127 oster 3515: #endif
3516:
1.327 pgoyette 3517: if (sc == NULL)
3518: sc = raidget(raidID, true);
1.300 christos 3519: raidPtr = &sc->sc_r;
1.51 oster 3520:
3521: /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302 christos 3522: raidPtr->softc = sc;
1.51 oster 3523: raidPtr->raidid = raidID;
3524: raidPtr->openings = RAIDOUTSTANDING;
3525:
3526: /* 3. Build the configuration structure */
3527: rf_create_configuration(cset->ac, config, raidPtr);
3528:
3529: /* 4. Do the configuration */
1.300 christos 3530: if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3531: raidinit(sc);
1.186 perry 3532:
1.300 christos 3533: rf_markalldirty(raidPtr);
3534: raidPtr->autoconfigure = 1; /* XXX do this here? */
1.308 christos 3535: switch (cset->ac->clabel->root_partition) {
3536: case 1: /* Force Root */
3537: case 2: /* Soft Root: root when boot partition part of raid */
3538: /*
3539: * everything configured just fine. Make a note
3540: * that this set is eligible to be root,
3541: * or forced to be root
3542: */
3543: cset->rootable = cset->ac->clabel->root_partition;
1.54 oster 3544: /* XXX do this here? */
1.308 christos 3545: raidPtr->root_partition = cset->rootable;
3546: break;
3547: default:
3548: break;
1.51 oster 3549: }
1.300 christos 3550: } else {
3551: raidput(sc);
3552: sc = NULL;
1.51 oster 3553: }
3554:
3555: /* 5. Cleanup */
3556: free(config, M_RAIDFRAME);
1.300 christos 3557: return sc;
1.99 oster 3558: }
3559:
3560: void
1.187 christos 3561: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3562: size_t xmin, size_t xmax)
1.177 oster 3563: {
1.352 christos 3564:
1.227 ad 3565: pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187 christos 3566: pool_sethiwat(p, xmax);
1.382 chs 3567: pool_prime(p, xmin);
1.177 oster 3568: }
1.190 oster 3569:
3570: /*
1.335 mlelstv 3571: * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buffer queue
3572: * to see if there is IO pending and if that IO could possibly be done
3573: * for a given RAID set. Returns 0 if IO is waiting and can be done, 1
1.190 oster 3574: * otherwise.
3575: *
3576: */
3577: int
1.300 christos 3578: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190 oster 3579: {
1.335 mlelstv 3580: struct raid_softc *rs;
3581: struct dk_softc *dksc;
3582:
3583: rs = raidPtr->softc;
3584: dksc = &rs->sc_dksc;
3585:
3586: if ((rs->sc_flags & RAIDF_INITED) == 0)
3587: return 1;
3588:
3589: if (dk_strategy_pending(dksc) && raidPtr->openings > 0) {
1.190 oster 3590: /* there is work to do */
3591: return 0;
1.335 mlelstv 3592: }
1.190 oster 3593: /* default is nothing to do */
3594: return 1;
3595: }
1.213 christos 3596:
3597: int
1.294 oster 3598: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213 christos 3599: {
1.275 mrg 3600: uint64_t numsecs;
3601: unsigned secsize;
1.213 christos 3602: int error;
3603:
1.275 mrg 3604: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 3605: if (error == 0) {
1.275 mrg 3606: diskPtr->blockSize = secsize;
3607: diskPtr->numBlocks = numsecs - rf_protectedSectors;
3608: diskPtr->partitionSize = numsecs;
1.213 christos 3609: return 0;
3610: }
3611: return error;
3612: }
1.217 oster 3613:
3614: static int
1.261 dyoung 3615: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217 oster 3616: {
3617: return 1;
3618: }
3619:
3620: static void
1.261 dyoung 3621: raid_attach(device_t parent, device_t self, void *aux)
1.217 oster 3622: {
3623: }
3624:
3625:
3626: static int
1.261 dyoung 3627: raid_detach(device_t self, int flags)
1.217 oster 3628: {
1.266 dyoung 3629: int error;
1.335 mlelstv 3630: struct raid_softc *rs = raidsoftc(self);
1.303 christos 3631:
3632: if (rs == NULL)
3633: return ENXIO;
1.266 dyoung 3634:
3635: if ((error = raidlock(rs)) != 0)
1.389 skrll 3636: return error;
1.217 oster 3637:
1.266 dyoung 3638: error = raid_detach_unlocked(rs);
3639:
1.332 mlelstv 3640: raidunlock(rs);
3641:
3642: /* XXX raid can be referenced here */
3643:
3644: if (error)
3645: return error;
3646:
3647: /* Free the softc */
3648: raidput(rs);
3649:
3650: return 0;
1.217 oster 3651: }
3652:
1.234 oster 3653: static void
1.304 christos 3654: rf_set_geometry(struct raid_softc *rs, RF_Raid_t *raidPtr)
1.234 oster 3655: {
1.335 mlelstv 3656: struct dk_softc *dksc = &rs->sc_dksc;
3657: struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1.304 christos 3658:
3659: memset(dg, 0, sizeof(*dg));
3660:
3661: dg->dg_secperunit = raidPtr->totalSectors;
3662: dg->dg_secsize = raidPtr->bytesPerSector;
3663: dg->dg_nsectors = raidPtr->Layout.dataSectorsPerStripe;
3664: dg->dg_ntracks = 4 * raidPtr->numCol;
3665:
1.335 mlelstv 3666: disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1.234 oster 3667: }
1.252 oster 3668:
1.348 jdolecek 3669: /*
3670: * Get cache info for all the components (including spares).
3671: * Returns intersection of all the cache flags of all disks, or first
3672: * error if any encountered.
3673: * XXXfua feature flags can change as spares are added - lock down somehow
3674: */
3675: static int
3676: rf_get_component_caches(RF_Raid_t *raidPtr, int *data)
3677: {
3678: int c;
3679: int error;
3680: int dkwhole = 0, dkpart;
1.385 riastrad 3681:
1.348 jdolecek 3682: for (c = 0; c < raidPtr->numCol + raidPtr->numSpare; c++) {
3683: /*
3684: * Check any non-dead disk, even when currently being
3685: * reconstructed.
3686: */
3687: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)
3688: || raidPtr->Disks[c].status == rf_ds_reconstructing) {
3689: error = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp,
3690: DIOCGCACHE, &dkpart, FREAD, NOCRED);
3691: if (error) {
3692: if (error != ENODEV) {
3693: printf("raid%d: get cache for component %s failed\n",
3694: raidPtr->raidid,
3695: raidPtr->Disks[c].devname);
3696: }
3697:
3698: return error;
3699: }
3700:
3701: if (c == 0)
3702: dkwhole = dkpart;
3703: else
3704: dkwhole = DKCACHE_COMBINE(dkwhole, dkpart);
3705: }
3706: }
3707:
1.349 jdolecek 3708: *data = dkwhole;
1.348 jdolecek 3709:
3710: return 0;
3711: }
3712:
1.385 riastrad 3713: /*
1.252 oster 3714: * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3715: * We end up returning whatever error was returned by the first cache flush
3716: * that fails.
3717: */
3718:
1.386 christos 3719: static int
1.390 christos 3720: rf_sync_component_cache(RF_Raid_t *raidPtr, int c, int force)
1.386 christos 3721: {
3722: int e = 0;
3723: for (int i = 0; i < 5; i++) {
3724: e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3725: &force, FWRITE, NOCRED);
3726: if (!e || e == ENODEV)
3727: return e;
3728: printf("raid%d: cache flush[%d] to component %s failed (%d)\n",
3729: raidPtr->raidid, i, raidPtr->Disks[c].devname, e);
3730: }
1.387 christos 3731: return e;
1.386 christos 3732: }
3733:
1.269 jld 3734: int
1.390 christos 3735: rf_sync_component_caches(RF_Raid_t *raidPtr, int force)
1.252 oster 3736: {
1.386 christos 3737: int c, error;
1.385 riastrad 3738:
1.252 oster 3739: error = 0;
3740: for (c = 0; c < raidPtr->numCol; c++) {
3741: if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.390 christos 3742: int e = rf_sync_component_cache(raidPtr, c, force);
1.387 christos 3743: if (e && !error)
1.386 christos 3744: error = e;
1.252 oster 3745: }
3746: }
3747:
1.386 christos 3748: for (c = 0; c < raidPtr->numSpare ; c++) {
3749: int sparecol = raidPtr->numCol + c;
1.252 oster 3750: /* Need to ensure that the reconstruct actually completed! */
3751: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.390 christos 3752: int e = rf_sync_component_cache(raidPtr, sparecol,
3753: force);
1.387 christos 3754: if (e && !error)
1.386 christos 3755: error = e;
1.252 oster 3756: }
3757: }
3758: return error;
3759: }
1.327 pgoyette 3760:
1.353 mrg 3761: /* Fill in info with the current status */
3762: void
3763: rf_check_recon_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3764: {
3765:
3766: if (raidPtr->status != rf_rs_reconstructing) {
3767: info->total = 100;
3768: info->completed = 100;
3769: } else {
3770: info->total = raidPtr->reconControl->numRUsTotal;
3771: info->completed = raidPtr->reconControl->numRUsComplete;
3772: }
3773: info->remaining = info->total - info->completed;
3774: }
3775:
3776: /* Fill in info with the current status */
3777: void
3778: rf_check_parityrewrite_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3779: {
3780:
3781: if (raidPtr->parity_rewrite_in_progress == 1) {
3782: info->total = raidPtr->Layout.numStripe;
3783: info->completed = raidPtr->parity_rewrite_stripes_done;
3784: } else {
3785: info->completed = 100;
3786: info->total = 100;
3787: }
3788: info->remaining = info->total - info->completed;
3789: }
3790:
3791: /* Fill in info with the current status */
3792: void
3793: rf_check_copyback_status_ext(RF_Raid_t *raidPtr, RF_ProgressInfo_t *info)
3794: {
3795:
3796: if (raidPtr->copyback_in_progress == 1) {
3797: info->total = raidPtr->Layout.numStripe;
3798: info->completed = raidPtr->copyback_stripes_done;
3799: info->remaining = info->total - info->completed;
3800: } else {
3801: info->remaining = 0;
3802: info->completed = 100;
3803: info->total = 100;
3804: }
3805: }
3806:
3807: /* Fill in config with the current info */
3808: int
3809: rf_get_info(RF_Raid_t *raidPtr, RF_DeviceConfig_t *config)
3810: {
3811: int d, i, j;
3812:
3813: if (!raidPtr->valid)
1.389 skrll 3814: return ENODEV;
1.353 mrg 3815: config->cols = raidPtr->numCol;
3816: config->ndevs = raidPtr->numCol;
3817: if (config->ndevs >= RF_MAX_DISKS)
1.389 skrll 3818: return ENOMEM;
1.353 mrg 3819: config->nspares = raidPtr->numSpare;
3820: if (config->nspares >= RF_MAX_DISKS)
1.389 skrll 3821: return ENOMEM;
1.353 mrg 3822: config->maxqdepth = raidPtr->maxQueueDepth;
3823: d = 0;
3824: for (j = 0; j < config->cols; j++) {
3825: config->devs[d] = raidPtr->Disks[j];
3826: d++;
3827: }
3828: for (j = config->cols, i = 0; i < config->nspares; i++, j++) {
3829: config->spares[i] = raidPtr->Disks[j];
3830: if (config->spares[i].status == rf_ds_rebuilding_spare) {
3831: /* XXX: raidctl(8) expects to see this as a used spare */
3832: config->spares[i].status = rf_ds_used_spare;
3833: }
3834: }
3835: return 0;
3836: }
3837:
3838: int
3839: rf_get_component_label(RF_Raid_t *raidPtr, void *data)
3840: {
3841: RF_ComponentLabel_t *clabel = (RF_ComponentLabel_t *)data;
3842: RF_ComponentLabel_t *raid_clabel;
3843: int column = clabel->column;
3844:
3845: if ((column < 0) || (column >= raidPtr->numCol + raidPtr->numSpare))
3846: return EINVAL;
3847: raid_clabel = raidget_component_label(raidPtr, column);
3848: memcpy(clabel, raid_clabel, sizeof *clabel);
1.394 ! mrg 3849: /* Fix-up for userland. */
! 3850: if (clabel->version == bswap32(RF_COMPONENT_LABEL_VERSION))
! 3851: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.353 mrg 3852:
3853: return 0;
3854: }
3855:
1.327 pgoyette 3856: /*
3857: * Module interface
3858: */
3859:
1.356 pgoyette 3860: MODULE(MODULE_CLASS_DRIVER, raid, "dk_subr,bufq_fcfs");
1.327 pgoyette 3861:
3862: #ifdef _MODULE
3863: CFDRIVER_DECL(raid, DV_DISK, NULL);
3864: #endif
3865:
3866: static int raid_modcmd(modcmd_t, void *);
3867: static int raid_modcmd_init(void);
3868: static int raid_modcmd_fini(void);
3869:
3870: static int
3871: raid_modcmd(modcmd_t cmd, void *data)
3872: {
3873: int error;
3874:
3875: error = 0;
3876: switch (cmd) {
3877: case MODULE_CMD_INIT:
3878: error = raid_modcmd_init();
3879: break;
3880: case MODULE_CMD_FINI:
3881: error = raid_modcmd_fini();
3882: break;
3883: default:
3884: error = ENOTTY;
3885: break;
3886: }
3887: return error;
3888: }
3889:
3890: static int
3891: raid_modcmd_init(void)
3892: {
3893: int error;
3894: int bmajor, cmajor;
3895:
3896: mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
3897: mutex_enter(&raid_lock);
3898: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
3899: rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
3900: rf_init_cond2(rf_sparet_wait_cv, "sparetw");
3901: rf_init_cond2(rf_sparet_resp_cv, "rfgst");
3902:
3903: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
3904: #endif
3905:
3906: bmajor = cmajor = -1;
3907: error = devsw_attach("raid", &raid_bdevsw, &bmajor,
3908: &raid_cdevsw, &cmajor);
3909: if (error != 0 && error != EEXIST) {
3910: aprint_error("%s: devsw_attach failed %d\n", __func__, error);
3911: mutex_exit(&raid_lock);
3912: return error;
3913: }
3914: #ifdef _MODULE
3915: error = config_cfdriver_attach(&raid_cd);
3916: if (error != 0) {
3917: aprint_error("%s: config_cfdriver_attach failed %d\n",
3918: __func__, error);
3919: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3920: mutex_exit(&raid_lock);
3921: return error;
3922: }
3923: #endif
3924: error = config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3925: if (error != 0) {
3926: aprint_error("%s: config_cfattach_attach failed %d\n",
3927: __func__, error);
3928: #ifdef _MODULE
3929: config_cfdriver_detach(&raid_cd);
3930: #endif
3931: devsw_detach(&raid_bdevsw, &raid_cdevsw);
3932: mutex_exit(&raid_lock);
3933: return error;
3934: }
3935:
3936: raidautoconfigdone = false;
3937:
3938: mutex_exit(&raid_lock);
3939:
3940: if (error == 0) {
3941: if (rf_BootRaidframe(true) == 0)
3942: aprint_verbose("Kernelized RAIDframe activated\n");
3943: else
3944: panic("Serious error activating RAID!!");
3945: }
3946:
3947: /*
3948: * Register a finalizer which will be used to auto-config RAID
3949: * sets once all real hardware devices have been found.
3950: */
3951: error = config_finalize_register(NULL, rf_autoconfig);
3952: if (error != 0) {
3953: aprint_error("WARNING: unable to register RAIDframe "
3954: "finalizer\n");
1.329 pgoyette 3955: error = 0;
1.327 pgoyette 3956: }
3957:
3958: return error;
3959: }
3960:
3961: static int
3962: raid_modcmd_fini(void)
3963: {
3964: int error;
3965:
3966: mutex_enter(&raid_lock);
3967:
3968: /* Don't allow unload if raid device(s) exist. */
3969: if (!LIST_EMPTY(&raids)) {
3970: mutex_exit(&raid_lock);
3971: return EBUSY;
3972: }
3973:
3974: error = config_cfattach_detach(raid_cd.cd_name, &raid_ca);
3975: if (error != 0) {
1.335 mlelstv 3976: aprint_error("%s: cannot detach cfattach\n",__func__);
1.327 pgoyette 3977: mutex_exit(&raid_lock);
3978: return error;
3979: }
3980: #ifdef _MODULE
3981: error = config_cfdriver_detach(&raid_cd);
3982: if (error != 0) {
1.335 mlelstv 3983: aprint_error("%s: cannot detach cfdriver\n",__func__);
1.327 pgoyette 3984: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3985: mutex_exit(&raid_lock);
3986: return error;
3987: }
3988: #endif
3989: error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
3990: if (error != 0) {
1.335 mlelstv 3991: aprint_error("%s: cannot detach devsw\n",__func__);
1.327 pgoyette 3992: #ifdef _MODULE
3993: config_cfdriver_attach(&raid_cd);
3994: #endif
3995: config_cfattach_attach(raid_cd.cd_name, &raid_ca);
3996: mutex_exit(&raid_lock);
3997: return error;
3998: }
3999: rf_BootRaidframe(false);
4000: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
4001: rf_destroy_mutex2(rf_sparet_wait_mutex);
4002: rf_destroy_cond2(rf_sparet_wait_cv);
4003: rf_destroy_cond2(rf_sparet_resp_cv);
4004: #endif
4005: mutex_exit(&raid_lock);
4006: mutex_destroy(&raid_lock);
4007:
4008: return error;
4009: }
CVSweb <webmaster@jp.NetBSD.org>