Annotation of src/sys/dev/raidframe/rf_netbsdkintf.c, Revision 1.303
1.303 ! christos 1: /* $NetBSD: rf_netbsdkintf.c,v 1.302 2013/04/29 21:21:10 christos Exp $ */
1.281 rmind 2:
1.1 oster 3: /*-
1.295 erh 4: * Copyright (c) 1996, 1997, 1998, 2008-2011 The NetBSD Foundation, Inc.
1.1 oster 5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Greg Oster; Jason R. Thorpe.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: *
19: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29: * POSSIBILITY OF SUCH DAMAGE.
30: */
31:
32: /*
1.281 rmind 33: * Copyright (c) 1988 University of Utah.
1.1 oster 34: * Copyright (c) 1990, 1993
35: * The Regents of the University of California. All rights reserved.
36: *
37: * This code is derived from software contributed to Berkeley by
38: * the Systems Programming Group of the University of Utah Computer
39: * Science Department.
40: *
41: * Redistribution and use in source and binary forms, with or without
42: * modification, are permitted provided that the following conditions
43: * are met:
44: * 1. Redistributions of source code must retain the above copyright
45: * notice, this list of conditions and the following disclaimer.
46: * 2. Redistributions in binary form must reproduce the above copyright
47: * notice, this list of conditions and the following disclaimer in the
48: * documentation and/or other materials provided with the distribution.
1.162 agc 49: * 3. Neither the name of the University nor the names of its contributors
50: * may be used to endorse or promote products derived from this software
51: * without specific prior written permission.
52: *
53: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63: * SUCH DAMAGE.
64: *
65: * from: Utah $Hdr: cd.c 1.6 90/11/28$
66: *
67: * @(#)cd.c 8.2 (Berkeley) 11/16/93
68: */
69:
70: /*
1.1 oster 71: * Copyright (c) 1995 Carnegie-Mellon University.
72: * All rights reserved.
73: *
74: * Authors: Mark Holland, Jim Zelenka
75: *
76: * Permission to use, copy, modify and distribute this software and
77: * its documentation is hereby granted, provided that both the copyright
78: * notice and this permission notice appear in all copies of the
79: * software, derivative works or modified versions, and any portions
80: * thereof, and that both notices appear in supporting documentation.
81: *
82: * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
83: * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
84: * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
85: *
86: * Carnegie Mellon requests users of this software to return to
87: *
88: * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
89: * School of Computer Science
90: * Carnegie Mellon University
91: * Pittsburgh PA 15213-3890
92: *
93: * any improvements or extensions that they make and grant Carnegie the
94: * rights to redistribute these changes.
95: */
96:
97: /***********************************************************
98: *
99: * rf_kintf.c -- the kernel interface routines for RAIDframe
100: *
101: ***********************************************************/
1.112 lukem 102:
103: #include <sys/cdefs.h>
1.303 ! christos 104: __KERNEL_RCSID(0, "$NetBSD: rf_netbsdkintf.c,v 1.302 2013/04/29 21:21:10 christos Exp $");
1.251 ad 105:
106: #ifdef _KERNEL_OPT
1.254 christos 107: #include "opt_compat_netbsd.h"
1.251 ad 108: #include "opt_raid_autoconfig.h"
109: #endif
1.1 oster 110:
1.113 lukem 111: #include <sys/param.h>
1.1 oster 112: #include <sys/errno.h>
113: #include <sys/pool.h>
1.152 thorpej 114: #include <sys/proc.h>
1.1 oster 115: #include <sys/queue.h>
116: #include <sys/disk.h>
117: #include <sys/device.h>
118: #include <sys/stat.h>
119: #include <sys/ioctl.h>
120: #include <sys/fcntl.h>
121: #include <sys/systm.h>
122: #include <sys/vnode.h>
123: #include <sys/disklabel.h>
124: #include <sys/conf.h>
125: #include <sys/buf.h>
1.182 yamt 126: #include <sys/bufq.h>
1.65 oster 127: #include <sys/reboot.h>
1.208 elad 128: #include <sys/kauth.h>
1.8 oster 129:
1.234 oster 130: #include <prop/proplib.h>
131:
1.110 oster 132: #include <dev/raidframe/raidframevar.h>
133: #include <dev/raidframe/raidframeio.h>
1.269 jld 134: #include <dev/raidframe/rf_paritymap.h>
1.251 ad 135:
1.1 oster 136: #include "rf_raid.h"
1.44 oster 137: #include "rf_copyback.h"
1.1 oster 138: #include "rf_dag.h"
139: #include "rf_dagflags.h"
1.99 oster 140: #include "rf_desc.h"
1.1 oster 141: #include "rf_diskqueue.h"
142: #include "rf_etimer.h"
143: #include "rf_general.h"
144: #include "rf_kintf.h"
145: #include "rf_options.h"
146: #include "rf_driver.h"
147: #include "rf_parityscan.h"
148: #include "rf_threadstuff.h"
149:
1.254 christos 150: #ifdef COMPAT_50
151: #include "rf_compat50.h"
152: #endif
153:
1.133 oster 154: #ifdef DEBUG
1.9 oster 155: int rf_kdebug_level = 0;
1.1 oster 156: #define db1_printf(a) if (rf_kdebug_level > 0) printf a
1.9 oster 157: #else /* DEBUG */
1.1 oster 158: #define db1_printf(a) { }
1.9 oster 159: #endif /* DEBUG */
1.1 oster 160:
1.249 oster 161: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289 mrg 162: static rf_declare_mutex2(rf_sparet_wait_mutex);
1.287 mrg 163: static rf_declare_cond2(rf_sparet_wait_cv);
164: static rf_declare_cond2(rf_sparet_resp_cv);
1.1 oster 165:
1.10 oster 166: static RF_SparetWait_t *rf_sparet_wait_queue; /* requests to install a
167: * spare table */
168: static RF_SparetWait_t *rf_sparet_resp_queue; /* responses from
169: * installation process */
1.249 oster 170: #endif
1.153 thorpej 171:
172: MALLOC_DEFINE(M_RAIDFRAME, "RAIDframe", "RAIDframe structures");
1.10 oster 173:
1.1 oster 174: /* prototypes */
1.187 christos 175: static void KernelWakeupFunc(struct buf *);
176: static void InitBP(struct buf *, struct vnode *, unsigned,
1.225 christos 177: dev_t, RF_SectorNum_t, RF_SectorCount_t, void *, void (*) (struct buf *),
1.187 christos 178: void *, int, struct proc *);
1.300 christos 179: struct raid_softc;
180: static void raidinit(struct raid_softc *);
1.1 oster 181:
1.104 oster 182: void raidattach(int);
1.261 dyoung 183: static int raid_match(device_t, cfdata_t, void *);
184: static void raid_attach(device_t, device_t, void *);
185: static int raid_detach(device_t, int);
1.130 gehenna 186:
1.269 jld 187: static int raidread_component_area(dev_t, struct vnode *, void *, size_t,
188: daddr_t, daddr_t);
189: static int raidwrite_component_area(dev_t, struct vnode *, void *, size_t,
190: daddr_t, daddr_t, int);
191:
1.276 mrg 192: static int raidwrite_component_label(unsigned,
193: dev_t, struct vnode *, RF_ComponentLabel_t *);
194: static int raidread_component_label(unsigned,
195: dev_t, struct vnode *, RF_ComponentLabel_t *);
1.269 jld 196:
197:
1.130 gehenna 198: dev_type_open(raidopen);
199: dev_type_close(raidclose);
200: dev_type_read(raidread);
201: dev_type_write(raidwrite);
202: dev_type_ioctl(raidioctl);
203: dev_type_strategy(raidstrategy);
204: dev_type_dump(raiddump);
205: dev_type_size(raidsize);
206:
207: const struct bdevsw raid_bdevsw = {
208: raidopen, raidclose, raidstrategy, raidioctl,
209: raiddump, raidsize, D_DISK
210: };
211:
212: const struct cdevsw raid_cdevsw = {
213: raidopen, raidclose, raidread, raidwrite, raidioctl,
1.144 jdolecek 214: nostop, notty, nopoll, nommap, nokqfilter, D_DISK
1.130 gehenna 215: };
1.1 oster 216:
1.235 oster 217: static struct dkdriver rf_dkdriver = { raidstrategy, minphys };
218:
1.10 oster 219: struct raid_softc {
1.261 dyoung 220: device_t sc_dev;
1.300 christos 221: int sc_unit;
1.10 oster 222: int sc_flags; /* flags */
223: int sc_cflags; /* configuration flags */
1.212 oster 224: uint64_t sc_size; /* size of the raid device */
1.10 oster 225: char sc_xname[20]; /* XXX external name */
226: struct disk sc_dkdev; /* generic disk device info */
1.191 yamt 227: struct bufq_state *buf_queue; /* used for the device queue */
1.300 christos 228: RF_Raid_t sc_r;
229: LIST_ENTRY(raid_softc) sc_link;
1.10 oster 230: };
1.1 oster 231: /* sc_flags */
232: #define RAIDF_INITED 0x01 /* unit has been initialized */
233: #define RAIDF_WLABEL 0x02 /* label area is writable */
234: #define RAIDF_LABELLING 0x04 /* unit is currently being labelled */
1.266 dyoung 235: #define RAIDF_SHUTDOWN 0x08 /* unit is being shutdown */
1.1 oster 236: #define RAIDF_WANTED 0x40 /* someone is waiting to obtain a lock */
237: #define RAIDF_LOCKED 0x80 /* unit is locked */
238:
239: #define raidunit(x) DISKUNIT(x)
240:
1.202 oster 241: extern struct cfdriver raid_cd;
1.266 dyoung 242: CFATTACH_DECL3_NEW(raid, sizeof(struct raid_softc),
243: raid_match, raid_attach, raid_detach, NULL, NULL, NULL,
244: DVF_DETACH_SHUTDOWN);
1.202 oster 245:
1.186 perry 246: /*
247: * Allow RAIDOUTSTANDING number of simultaneous IO's to this RAID device.
248: * Be aware that large numbers can allow the driver to consume a lot of
1.28 oster 249: * kernel memory, especially on writes, and in degraded mode reads.
1.186 perry 250: *
251: * For example: with a stripe width of 64 blocks (32k) and 5 disks,
252: * a single 64K write will typically require 64K for the old data,
253: * 64K for the old parity, and 64K for the new parity, for a total
1.28 oster 254: * of 192K (if the parity buffer is not re-used immediately).
1.110 oster 255: * Even it if is used immediately, that's still 128K, which when multiplied
1.28 oster 256: * by say 10 requests, is 1280K, *on top* of the 640K of incoming data.
1.186 perry 257: *
1.28 oster 258: * Now in degraded mode, for example, a 64K read on the above setup may
1.186 perry 259: * require data reconstruction, which will require *all* of the 4 remaining
1.28 oster 260: * disks to participate -- 4 * 32K/disk == 128K again.
1.20 oster 261: */
262:
263: #ifndef RAIDOUTSTANDING
1.28 oster 264: #define RAIDOUTSTANDING 6
1.20 oster 265: #endif
266:
1.1 oster 267: #define RAIDLABELDEV(dev) \
268: (MAKEDISKDEV(major((dev)), raidunit((dev)), RAW_PART))
269:
270: /* declared here, and made public, for the benefit of KVM stuff.. */
1.9 oster 271:
1.186 perry 272: static void raidgetdefaultlabel(RF_Raid_t *, struct raid_softc *,
1.104 oster 273: struct disklabel *);
274: static void raidgetdisklabel(dev_t);
275: static void raidmakedisklabel(struct raid_softc *);
1.1 oster 276:
1.104 oster 277: static int raidlock(struct raid_softc *);
278: static void raidunlock(struct raid_softc *);
1.1 oster 279:
1.266 dyoung 280: static int raid_detach_unlocked(struct raid_softc *);
281:
1.104 oster 282: static void rf_markalldirty(RF_Raid_t *);
1.234 oster 283: static void rf_set_properties(struct raid_softc *, RF_Raid_t *);
1.48 oster 284:
1.104 oster 285: void rf_ReconThread(struct rf_recon_req *);
286: void rf_RewriteParityThread(RF_Raid_t *raidPtr);
287: void rf_CopybackThread(RF_Raid_t *raidPtr);
288: void rf_ReconstructInPlaceThread(struct rf_recon_req *);
1.261 dyoung 289: int rf_autoconfig(device_t);
1.142 thorpej 290: void rf_buildroothack(RF_ConfigSet_t *);
1.104 oster 291:
292: RF_AutoConfig_t *rf_find_raid_components(void);
293: RF_ConfigSet_t *rf_create_auto_sets(RF_AutoConfig_t *);
294: static int rf_does_it_fit(RF_ConfigSet_t *,RF_AutoConfig_t *);
1.292 oster 295: int rf_reasonable_label(RF_ComponentLabel_t *, uint64_t);
1.104 oster 296: void rf_create_configuration(RF_AutoConfig_t *,RF_Config_t *, RF_Raid_t *);
297: int rf_set_autoconfig(RF_Raid_t *, int);
298: int rf_set_rootpartition(RF_Raid_t *, int);
299: void rf_release_all_vps(RF_ConfigSet_t *);
300: void rf_cleanup_config_set(RF_ConfigSet_t *);
301: int rf_have_enough_components(RF_ConfigSet_t *);
1.300 christos 302: struct raid_softc *rf_auto_config_set(RF_ConfigSet_t *);
1.278 mrg 303: static void rf_fix_old_label_size(RF_ComponentLabel_t *, uint64_t);
1.48 oster 304:
1.295 erh 305: /*
306: * Debugging, mostly. Set to 0 to not allow autoconfig to take place.
307: * Note that this is overridden by having RAID_AUTOCONFIG as an option
308: * in the kernel config file.
309: */
310: #ifdef RAID_AUTOCONFIG
311: int raidautoconfig = 1;
312: #else
313: int raidautoconfig = 0;
314: #endif
315: static bool raidautoconfigdone = false;
1.37 oster 316:
1.177 oster 317: struct RF_Pools_s rf_pools;
318:
1.300 christos 319: static LIST_HEAD(, raid_softc) raids = LIST_HEAD_INITIALIZER(raids);
320: static kmutex_t raid_lock;
1.1 oster 321:
1.300 christos 322: static struct raid_softc *
323: raidcreate(int unit) {
324: struct raid_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
325: if (sc == NULL) {
1.1 oster 326: #ifdef DIAGNOSTIC
1.300 christos 327: printf("%s: out of memory\n", __func__);
1.1 oster 328: #endif
1.300 christos 329: return NULL;
1.1 oster 330: }
1.300 christos 331: sc->sc_unit = unit;
332: bufq_alloc(&sc->buf_queue, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
333: return sc;
334: }
1.1 oster 335:
1.300 christos 336: static void
337: raiddestroy(struct raid_softc *sc) {
338: bufq_free(sc->buf_queue);
339: kmem_free(sc, sizeof(*sc));
340: }
1.50 oster 341:
1.300 christos 342: static struct raid_softc *
343: raidget(int unit) {
344: struct raid_softc *sc;
345: if (unit < 0) {
346: #ifdef DIAGNOSTIC
347: panic("%s: unit %d!", __func__, unit);
348: #endif
349: return NULL;
350: }
351: mutex_enter(&raid_lock);
352: LIST_FOREACH(sc, &raids, sc_link) {
353: if (sc->sc_unit == unit) {
354: mutex_exit(&raid_lock);
355: return sc;
356: }
357: }
358: mutex_exit(&raid_lock);
359: if ((sc = raidcreate(unit)) == NULL)
360: return NULL;
361: mutex_enter(&raid_lock);
362: LIST_INSERT_HEAD(&raids, sc, sc_link);
363: mutex_exit(&raid_lock);
364: return sc;
365: }
366:
367: static void
368: raidput(struct raid_softc *sc) {
369: mutex_enter(&raid_lock);
370: LIST_REMOVE(sc, sc_link);
371: mutex_exit(&raid_lock);
372: raiddestroy(sc);
373: }
1.1 oster 374:
1.300 christos 375: void
376: raidattach(int num)
377: {
378: mutex_init(&raid_lock, MUTEX_DEFAULT, IPL_NONE);
379: /* This is where all the initialization stuff gets done. */
1.116 thorpej 380:
1.249 oster 381: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.289 mrg 382: rf_init_mutex2(rf_sparet_wait_mutex, IPL_VM);
383: rf_init_cond2(rf_sparet_wait_cv, "sparetw");
384: rf_init_cond2(rf_sparet_resp_cv, "rfgst");
1.14 oster 385:
386: rf_sparet_wait_queue = rf_sparet_resp_queue = NULL;
1.249 oster 387: #endif
1.14 oster 388:
1.300 christos 389: if (rf_BootRaidframe() == 0)
1.274 chs 390: aprint_verbose("Kernelized RAIDframe activated\n");
1.14 oster 391: else
1.141 provos 392: panic("Serious error booting RAID!!");
1.14 oster 393:
1.217 oster 394: if (config_cfattach_attach(raid_cd.cd_name, &raid_ca)) {
1.239 jmcneill 395: aprint_error("raidattach: config_cfattach_attach failed?\n");
1.217 oster 396: }
397:
1.295 erh 398: raidautoconfigdone = false;
1.62 oster 399:
1.142 thorpej 400: /*
401: * Register a finalizer which will be used to auto-config RAID
402: * sets once all real hardware devices have been found.
403: */
404: if (config_finalize_register(NULL, rf_autoconfig) != 0)
1.239 jmcneill 405: aprint_error("WARNING: unable to register RAIDframe finalizer\n");
1.142 thorpej 406: }
407:
408: int
1.261 dyoung 409: rf_autoconfig(device_t self)
1.142 thorpej 410: {
411: RF_AutoConfig_t *ac_list;
412: RF_ConfigSet_t *config_sets;
413:
1.295 erh 414: if (!raidautoconfig || raidautoconfigdone == true)
1.142 thorpej 415: return (0);
416:
417: /* XXX This code can only be run once. */
1.295 erh 418: raidautoconfigdone = true;
1.142 thorpej 419:
1.48 oster 420: /* 1. locate all RAID components on the system */
1.258 ad 421: aprint_debug("Searching for RAID components...\n");
1.48 oster 422: ac_list = rf_find_raid_components();
423:
1.142 thorpej 424: /* 2. Sort them into their respective sets. */
1.48 oster 425: config_sets = rf_create_auto_sets(ac_list);
426:
1.142 thorpej 427: /*
1.299 oster 428: * 3. Evaluate each set and configure the valid ones.
1.142 thorpej 429: * This gets done in rf_buildroothack().
430: */
431: rf_buildroothack(config_sets);
1.48 oster 432:
1.213 christos 433: return 1;
1.48 oster 434: }
435:
436: void
1.142 thorpej 437: rf_buildroothack(RF_ConfigSet_t *config_sets)
1.48 oster 438: {
439: RF_ConfigSet_t *cset;
440: RF_ConfigSet_t *next_cset;
1.226 oster 441: int col;
1.51 oster 442: int num_root;
1.226 oster 443: char *devname;
1.300 christos 444: struct raid_softc *sc, *rsc;
1.48 oster 445:
1.300 christos 446: sc = rsc = NULL;
1.51 oster 447: num_root = 0;
1.48 oster 448: cset = config_sets;
1.271 dyoung 449: while (cset != NULL) {
1.48 oster 450: next_cset = cset->next;
1.186 perry 451: if (rf_have_enough_components(cset) &&
1.300 christos 452: cset->ac->clabel->autoconfigure == 1) {
453: sc = rf_auto_config_set(cset);
454: if (sc != NULL) {
455: aprint_debug("raid%d: configured ok\n",
456: sc->sc_unit);
1.51 oster 457: if (cset->rootable) {
1.300 christos 458: rsc = sc;
1.51 oster 459: num_root++;
460: }
461: } else {
462: /* The autoconfig didn't work :( */
1.300 christos 463: aprint_debug("Autoconfig failed\n");
1.51 oster 464: rf_release_all_vps(cset);
1.48 oster 465: }
466: } else {
1.186 perry 467: /* we're not autoconfiguring this set...
1.48 oster 468: release the associated resources */
1.49 oster 469: rf_release_all_vps(cset);
1.48 oster 470: }
471: /* cleanup */
1.49 oster 472: rf_cleanup_config_set(cset);
1.48 oster 473: cset = next_cset;
474: }
1.122 oster 475:
1.223 oster 476: /* if the user has specified what the root device should be
477: then we don't touch booted_device or boothowto... */
478:
479: if (rootspec != NULL)
480: return;
481:
1.122 oster 482: /* we found something bootable... */
483:
484: if (num_root == 1) {
1.300 christos 485: if (rsc->sc_dkdev.dk_nwedges != 0) {
1.297 christos 486: /* XXX: How do we find the real root partition? */
487: char cname[sizeof(cset->ac->devname)];
488: snprintf(cname, sizeof(cname), "%s%c",
1.300 christos 489: device_xname(rsc->sc_dev), 'a');
1.297 christos 490: booted_device = dkwedge_find_by_wname(cname);
491: } else
1.300 christos 492: booted_device = rsc->sc_dev;
1.122 oster 493: } else if (num_root > 1) {
1.226 oster 494:
495: /*
496: * Maybe the MD code can help. If it cannot, then
497: * setroot() will discover that we have no
498: * booted_device and will ask the user if nothing was
499: * hardwired in the kernel config file
500: */
501:
502: if (booted_device == NULL)
503: cpu_rootconf();
504: if (booted_device == NULL)
505: return;
506:
507: num_root = 0;
1.300 christos 508: mutex_enter(&raid_lock);
509: LIST_FOREACH(sc, &raids, sc_link) {
510: RF_Raid_t *r = &sc->sc_r;
511: if (r->valid == 0)
1.226 oster 512: continue;
513:
1.300 christos 514: if (r->root_partition == 0)
1.226 oster 515: continue;
516:
1.300 christos 517: for (col = 0; col < r->numCol; col++) {
518: devname = r->Disks[col].devname;
1.226 oster 519: devname += sizeof("/dev/") - 1;
1.245 cegger 520: if (strncmp(devname, device_xname(booted_device),
521: strlen(device_xname(booted_device))) != 0)
1.226 oster 522: continue;
1.258 ad 523: aprint_debug("raid%d includes boot device %s\n",
1.300 christos 524: sc->sc_unit, devname);
1.226 oster 525: num_root++;
1.300 christos 526: rsc = sc;
1.226 oster 527: }
528: }
1.300 christos 529: mutex_exit(&raid_lock);
1.295 erh 530:
1.226 oster 531: if (num_root == 1) {
1.300 christos 532: booted_device = rsc->sc_dev;
1.226 oster 533: } else {
534: /* we can't guess.. require the user to answer... */
535: boothowto |= RB_ASKNAME;
536: }
1.51 oster 537: }
1.1 oster 538: }
539:
540:
541: int
1.169 oster 542: raidsize(dev_t dev)
1.1 oster 543: {
544: struct raid_softc *rs;
545: struct disklabel *lp;
1.9 oster 546: int part, unit, omask, size;
1.1 oster 547:
548: unit = raidunit(dev);
1.300 christos 549: if ((rs = raidget(unit)) == NULL)
550: return -1;
1.1 oster 551: if ((rs->sc_flags & RAIDF_INITED) == 0)
552: return (-1);
553:
554: part = DISKPART(dev);
555: omask = rs->sc_dkdev.dk_openmask & (1 << part);
556: lp = rs->sc_dkdev.dk_label;
557:
1.192 christos 558: if (omask == 0 && raidopen(dev, 0, S_IFBLK, curlwp))
1.1 oster 559: return (-1);
560:
561: if (lp->d_partitions[part].p_fstype != FS_SWAP)
562: size = -1;
563: else
564: size = lp->d_partitions[part].p_size *
565: (lp->d_secsize / DEV_BSIZE);
566:
1.192 christos 567: if (omask == 0 && raidclose(dev, 0, S_IFBLK, curlwp))
1.1 oster 568: return (-1);
569:
570: return (size);
571:
572: }
573:
574: int
1.231 oster 575: raiddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1.1 oster 576: {
1.231 oster 577: int unit = raidunit(dev);
578: struct raid_softc *rs;
579: const struct bdevsw *bdev;
580: struct disklabel *lp;
581: RF_Raid_t *raidPtr;
582: daddr_t offset;
583: int part, c, sparecol, j, scol, dumpto;
584: int error = 0;
585:
1.300 christos 586: if ((rs = raidget(unit)) == NULL)
587: return ENXIO;
1.231 oster 588:
1.300 christos 589: raidPtr = &rs->sc_r;
1.231 oster 590:
591: if ((rs->sc_flags & RAIDF_INITED) == 0)
592: return ENXIO;
593:
594: /* we only support dumping to RAID 1 sets */
595: if (raidPtr->Layout.numDataCol != 1 ||
596: raidPtr->Layout.numParityCol != 1)
597: return EINVAL;
598:
599:
600: if ((error = raidlock(rs)) != 0)
601: return error;
602:
603: if (size % DEV_BSIZE != 0) {
604: error = EINVAL;
605: goto out;
606: }
607:
608: if (blkno + size / DEV_BSIZE > rs->sc_size) {
609: printf("%s: blkno (%" PRIu64 ") + size / DEV_BSIZE (%zu) > "
610: "sc->sc_size (%" PRIu64 ")\n", __func__, blkno,
611: size / DEV_BSIZE, rs->sc_size);
612: error = EINVAL;
613: goto out;
614: }
615:
616: part = DISKPART(dev);
617: lp = rs->sc_dkdev.dk_label;
618: offset = lp->d_partitions[part].p_offset + RF_PROTECTED_SECTORS;
619:
620: /* figure out what device is alive.. */
621:
622: /*
623: Look for a component to dump to. The preference for the
624: component to dump to is as follows:
625: 1) the master
626: 2) a used_spare of the master
627: 3) the slave
628: 4) a used_spare of the slave
629: */
630:
631: dumpto = -1;
632: for (c = 0; c < raidPtr->numCol; c++) {
633: if (raidPtr->Disks[c].status == rf_ds_optimal) {
634: /* this might be the one */
635: dumpto = c;
636: break;
637: }
638: }
639:
640: /*
641: At this point we have possibly selected a live master or a
642: live slave. We now check to see if there is a spared
643: master (or a spared slave), if we didn't find a live master
644: or a live slave.
645: */
646:
647: for (c = 0; c < raidPtr->numSpare; c++) {
648: sparecol = raidPtr->numCol + c;
649: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
650: /* How about this one? */
651: scol = -1;
652: for(j=0;j<raidPtr->numCol;j++) {
653: if (raidPtr->Disks[j].spareCol == sparecol) {
654: scol = j;
655: break;
656: }
657: }
658: if (scol == 0) {
659: /*
660: We must have found a spared master!
661: We'll take that over anything else
662: found so far. (We couldn't have
663: found a real master before, since
664: this is a used spare, and it's
665: saying that it's replacing the
666: master.) On reboot (with
667: autoconfiguration turned on)
668: sparecol will become the 1st
669: component (component0) of this set.
670: */
671: dumpto = sparecol;
672: break;
673: } else if (scol != -1) {
674: /*
675: Must be a spared slave. We'll dump
676: to that if we havn't found anything
677: else so far.
678: */
679: if (dumpto == -1)
680: dumpto = sparecol;
681: }
682: }
683: }
684:
685: if (dumpto == -1) {
686: /* we couldn't find any live components to dump to!?!?
687: */
688: error = EINVAL;
689: goto out;
690: }
691:
692: bdev = bdevsw_lookup(raidPtr->Disks[dumpto].dev);
693:
694: /*
695: Note that blkno is relative to this particular partition.
696: By adding the offset of this partition in the RAID
697: set, and also adding RF_PROTECTED_SECTORS, we get a
698: value that is relative to the partition used for the
699: underlying component.
700: */
701:
702: error = (*bdev->d_dump)(raidPtr->Disks[dumpto].dev,
703: blkno + offset, va, size);
704:
705: out:
706: raidunlock(rs);
707:
708: return error;
1.1 oster 709: }
710: /* ARGSUSED */
711: int
1.222 christos 712: raidopen(dev_t dev, int flags, int fmt,
713: struct lwp *l)
1.1 oster 714: {
1.9 oster 715: int unit = raidunit(dev);
1.1 oster 716: struct raid_softc *rs;
717: struct disklabel *lp;
1.9 oster 718: int part, pmask;
719: int error = 0;
720:
1.300 christos 721: if ((rs = raidget(unit)) == NULL)
722: return ENXIO;
1.1 oster 723: if ((error = raidlock(rs)) != 0)
1.9 oster 724: return (error);
1.266 dyoung 725:
726: if ((rs->sc_flags & RAIDF_SHUTDOWN) != 0) {
727: error = EBUSY;
728: goto bad;
729: }
730:
1.1 oster 731: lp = rs->sc_dkdev.dk_label;
732:
733: part = DISKPART(dev);
1.213 christos 734:
735: /*
736: * If there are wedges, and this is not RAW_PART, then we
737: * need to fail.
738: */
739: if (rs->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) {
740: error = EBUSY;
741: goto bad;
742: }
1.1 oster 743: pmask = (1 << part);
744:
745: if ((rs->sc_flags & RAIDF_INITED) &&
746: (rs->sc_dkdev.dk_openmask == 0))
1.9 oster 747: raidgetdisklabel(dev);
1.1 oster 748:
749: /* make sure that this partition exists */
750:
751: if (part != RAW_PART) {
752: if (((rs->sc_flags & RAIDF_INITED) == 0) ||
753: ((part >= lp->d_npartitions) ||
1.9 oster 754: (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
1.1 oster 755: error = ENXIO;
1.213 christos 756: goto bad;
1.1 oster 757: }
758: }
759: /* Prevent this unit from being unconfigured while open. */
760: switch (fmt) {
761: case S_IFCHR:
762: rs->sc_dkdev.dk_copenmask |= pmask;
763: break;
764:
765: case S_IFBLK:
766: rs->sc_dkdev.dk_bopenmask |= pmask;
767: break;
768: }
1.13 oster 769:
1.186 perry 770: if ((rs->sc_dkdev.dk_openmask == 0) &&
1.13 oster 771: ((rs->sc_flags & RAIDF_INITED) != 0)) {
772: /* First one... mark things as dirty... Note that we *MUST*
773: have done a configure before this. I DO NOT WANT TO BE
774: SCRIBBLING TO RANDOM COMPONENTS UNTIL IT'S BEEN DETERMINED
775: THAT THEY BELONG TOGETHER!!!!! */
776: /* XXX should check to see if we're only open for reading
777: here... If so, we needn't do this, but then need some
778: other way of keeping track of what's happened.. */
779:
1.300 christos 780: rf_markalldirty(&rs->sc_r);
1.13 oster 781: }
782:
783:
1.1 oster 784: rs->sc_dkdev.dk_openmask =
785: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
786:
1.213 christos 787: bad:
1.1 oster 788: raidunlock(rs);
789:
1.9 oster 790: return (error);
1.1 oster 791:
792:
793: }
794: /* ARGSUSED */
795: int
1.222 christos 796: raidclose(dev_t dev, int flags, int fmt, struct lwp *l)
1.1 oster 797: {
1.9 oster 798: int unit = raidunit(dev);
1.1 oster 799: struct raid_softc *rs;
1.9 oster 800: int error = 0;
801: int part;
1.1 oster 802:
1.300 christos 803: if ((rs = raidget(unit)) == NULL)
804: return ENXIO;
1.1 oster 805:
806: if ((error = raidlock(rs)) != 0)
807: return (error);
808:
809: part = DISKPART(dev);
810:
811: /* ...that much closer to allowing unconfiguration... */
812: switch (fmt) {
813: case S_IFCHR:
814: rs->sc_dkdev.dk_copenmask &= ~(1 << part);
815: break;
816:
817: case S_IFBLK:
818: rs->sc_dkdev.dk_bopenmask &= ~(1 << part);
819: break;
820: }
821: rs->sc_dkdev.dk_openmask =
822: rs->sc_dkdev.dk_copenmask | rs->sc_dkdev.dk_bopenmask;
1.186 perry 823:
1.13 oster 824: if ((rs->sc_dkdev.dk_openmask == 0) &&
825: ((rs->sc_flags & RAIDF_INITED) != 0)) {
1.186 perry 826: /* Last one... device is not unconfigured yet.
827: Device shutdown has taken care of setting the
828: clean bits if RAIDF_INITED is not set
1.13 oster 829: mark things as clean... */
1.147 oster 830:
1.300 christos 831: rf_update_component_labels(&rs->sc_r,
1.91 oster 832: RF_FINAL_COMPONENT_UPDATE);
1.186 perry 833:
1.266 dyoung 834: /* If the kernel is shutting down, it will detach
835: * this RAID set soon enough.
836: */
1.13 oster 837: }
1.1 oster 838:
839: raidunlock(rs);
840: return (0);
841:
842: }
843:
844: void
1.169 oster 845: raidstrategy(struct buf *bp)
1.1 oster 846: {
1.300 christos 847: unsigned int unit = raidunit(bp->b_dev);
1.1 oster 848: RF_Raid_t *raidPtr;
1.9 oster 849: int wlabel;
1.300 christos 850: struct raid_softc *rs;
1.1 oster 851:
1.300 christos 852: if ((rs = raidget(unit)) == NULL) {
1.30 oster 853: bp->b_error = ENXIO;
1.196 yamt 854: goto done;
1.30 oster 855: }
1.300 christos 856: if ((rs->sc_flags & RAIDF_INITED) == 0) {
857: bp->b_error = ENXIO;
1.196 yamt 858: goto done;
1.1 oster 859: }
1.300 christos 860: raidPtr = &rs->sc_r;
1.1 oster 861: if (!raidPtr->valid) {
862: bp->b_error = ENODEV;
1.196 yamt 863: goto done;
1.1 oster 864: }
865: if (bp->b_bcount == 0) {
866: db1_printf(("b_bcount is zero..\n"));
1.196 yamt 867: goto done;
1.1 oster 868: }
869:
870: /*
871: * Do bounds checking and adjust transfer. If there's an
872: * error, the bounds check will flag that for us.
873: */
874:
1.9 oster 875: wlabel = rs->sc_flags & (RAIDF_WLABEL | RAIDF_LABELLING);
1.196 yamt 876: if (DISKPART(bp->b_dev) == RAW_PART) {
877: uint64_t size; /* device size in DEV_BSIZE unit */
878:
879: if (raidPtr->logBytesPerSector > DEV_BSHIFT) {
880: size = raidPtr->totalSectors <<
881: (raidPtr->logBytesPerSector - DEV_BSHIFT);
882: } else {
883: size = raidPtr->totalSectors >>
884: (DEV_BSHIFT - raidPtr->logBytesPerSector);
885: }
886: if (bounds_check_with_mediasize(bp, DEV_BSIZE, size) <= 0) {
887: goto done;
888: }
889: } else {
1.159 thorpej 890: if (bounds_check_with_label(&rs->sc_dkdev, bp, wlabel) <= 0) {
1.1 oster 891: db1_printf(("Bounds check failed!!:%d %d\n",
1.9 oster 892: (int) bp->b_blkno, (int) wlabel));
1.196 yamt 893: goto done;
1.1 oster 894: }
1.196 yamt 895: }
1.285 mrg 896:
1.286 mrg 897: rf_lock_mutex2(raidPtr->iodone_lock);
1.1 oster 898:
899: bp->b_resid = 0;
1.34 oster 900:
901: /* stuff it onto our queue */
1.253 yamt 902: bufq_put(rs->buf_queue, bp);
1.34 oster 903:
1.190 oster 904: /* scheduled the IO to happen at the next convenient time */
1.286 mrg 905: rf_signal_cond2(raidPtr->iodone_cv);
906: rf_unlock_mutex2(raidPtr->iodone_lock);
1.34 oster 907:
1.196 yamt 908: return;
909:
910: done:
911: bp->b_resid = bp->b_bcount;
912: biodone(bp);
1.1 oster 913: }
914: /* ARGSUSED */
915: int
1.222 christos 916: raidread(dev_t dev, struct uio *uio, int flags)
1.1 oster 917: {
1.9 oster 918: int unit = raidunit(dev);
1.1 oster 919: struct raid_softc *rs;
920:
1.300 christos 921: if ((rs = raidget(unit)) == NULL)
922: return ENXIO;
1.1 oster 923:
924: if ((rs->sc_flags & RAIDF_INITED) == 0)
925: return (ENXIO);
926:
927: return (physio(raidstrategy, NULL, dev, B_READ, minphys, uio));
928:
929: }
930: /* ARGSUSED */
931: int
1.222 christos 932: raidwrite(dev_t dev, struct uio *uio, int flags)
1.1 oster 933: {
1.9 oster 934: int unit = raidunit(dev);
1.1 oster 935: struct raid_softc *rs;
936:
1.300 christos 937: if ((rs = raidget(unit)) == NULL)
938: return ENXIO;
1.1 oster 939:
940: if ((rs->sc_flags & RAIDF_INITED) == 0)
941: return (ENXIO);
1.147 oster 942:
1.1 oster 943: return (physio(raidstrategy, NULL, dev, B_WRITE, minphys, uio));
944:
945: }
946:
1.266 dyoung 947: static int
948: raid_detach_unlocked(struct raid_softc *rs)
949: {
950: int error;
951: RF_Raid_t *raidPtr;
952:
1.300 christos 953: raidPtr = &rs->sc_r;
1.266 dyoung 954:
955: /*
956: * If somebody has a partition mounted, we shouldn't
957: * shutdown.
958: */
959: if (rs->sc_dkdev.dk_openmask != 0)
960: return EBUSY;
961:
962: if ((rs->sc_flags & RAIDF_INITED) == 0)
963: ; /* not initialized: nothing to do */
964: else if ((error = rf_Shutdown(raidPtr)) != 0)
965: return error;
966: else
967: rs->sc_flags &= ~(RAIDF_INITED|RAIDF_SHUTDOWN);
968:
969: /* Detach the disk. */
1.280 christos 970: dkwedge_delall(&rs->sc_dkdev);
1.266 dyoung 971: disk_detach(&rs->sc_dkdev);
972: disk_destroy(&rs->sc_dkdev);
973:
1.290 mrg 974: aprint_normal_dev(rs->sc_dev, "detached\n");
975:
1.266 dyoung 976: return 0;
977: }
978:
1.1 oster 979: int
1.225 christos 980: raidioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1.1 oster 981: {
1.9 oster 982: int unit = raidunit(dev);
983: int error = 0;
1.298 buhrow 984: int part, pmask, s;
1.262 cegger 985: cfdata_t cf;
1.1 oster 986: struct raid_softc *rs;
987: RF_Config_t *k_cfg, *u_cfg;
1.42 oster 988: RF_Raid_t *raidPtr;
1.48 oster 989: RF_RaidDisk_t *diskPtr;
1.41 oster 990: RF_AccTotals_t *totals;
991: RF_DeviceConfig_t *d_cfg, **ucfgp;
1.1 oster 992: u_char *specific_buf;
1.11 oster 993: int retcode = 0;
994: int column;
1.269 jld 995: /* int raidid; */
1.1 oster 996: struct rf_recon_req *rrcopy, *rr;
1.48 oster 997: RF_ComponentLabel_t *clabel;
1.209 oster 998: RF_ComponentLabel_t *ci_label;
1.48 oster 999: RF_ComponentLabel_t **clabel_ptr;
1.12 oster 1000: RF_SingleComponent_t *sparePtr,*componentPtr;
1001: RF_SingleComponent_t component;
1.83 oster 1002: RF_ProgressInfo_t progressInfo, **progressInfoPtr;
1.41 oster 1003: int i, j, d;
1.102 fvdl 1004: #ifdef __HAVE_OLD_DISKLABEL
1005: struct disklabel newlabel;
1006: #endif
1.213 christos 1007: struct dkwedge_info *dkw;
1.1 oster 1008:
1.300 christos 1009: if ((rs = raidget(unit)) == NULL)
1010: return ENXIO;
1011: raidPtr = &rs->sc_r;
1.1 oster 1012:
1.276 mrg 1013: db1_printf(("raidioctl: %d %d %d %lu\n", (int) dev,
1014: (int) DISKPART(dev), (int) unit, cmd));
1.1 oster 1015:
1016: /* Must be open for writes for these commands... */
1017: switch (cmd) {
1.213 christos 1018: #ifdef DIOCGSECTORSIZE
1019: case DIOCGSECTORSIZE:
1020: *(u_int *)data = raidPtr->bytesPerSector;
1021: return 0;
1022: case DIOCGMEDIASIZE:
1023: *(off_t *)data =
1024: (off_t)raidPtr->totalSectors * raidPtr->bytesPerSector;
1025: return 0;
1026: #endif
1.1 oster 1027: case DIOCSDINFO:
1028: case DIOCWDINFO:
1.102 fvdl 1029: #ifdef __HAVE_OLD_DISKLABEL
1030: case ODIOCWDINFO:
1031: case ODIOCSDINFO:
1032: #endif
1.1 oster 1033: case DIOCWLABEL:
1.213 christos 1034: case DIOCAWEDGE:
1035: case DIOCDWEDGE:
1.298 buhrow 1036: case DIOCSSTRATEGY:
1.1 oster 1037: if ((flag & FWRITE) == 0)
1038: return (EBADF);
1039: }
1040:
1041: /* Must be initialized for these... */
1042: switch (cmd) {
1043: case DIOCGDINFO:
1044: case DIOCSDINFO:
1045: case DIOCWDINFO:
1.102 fvdl 1046: #ifdef __HAVE_OLD_DISKLABEL
1047: case ODIOCGDINFO:
1048: case ODIOCWDINFO:
1049: case ODIOCSDINFO:
1050: case ODIOCGDEFLABEL:
1051: #endif
1.1 oster 1052: case DIOCGPART:
1053: case DIOCWLABEL:
1054: case DIOCGDEFLABEL:
1.213 christos 1055: case DIOCAWEDGE:
1056: case DIOCDWEDGE:
1057: case DIOCLWEDGES:
1.252 oster 1058: case DIOCCACHESYNC:
1.1 oster 1059: case RAIDFRAME_SHUTDOWN:
1060: case RAIDFRAME_REWRITEPARITY:
1061: case RAIDFRAME_GET_INFO:
1062: case RAIDFRAME_RESET_ACCTOTALS:
1063: case RAIDFRAME_GET_ACCTOTALS:
1064: case RAIDFRAME_KEEP_ACCTOTALS:
1065: case RAIDFRAME_GET_SIZE:
1066: case RAIDFRAME_FAIL_DISK:
1067: case RAIDFRAME_COPYBACK:
1.37 oster 1068: case RAIDFRAME_CHECK_RECON_STATUS:
1.83 oster 1069: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1.11 oster 1070: case RAIDFRAME_GET_COMPONENT_LABEL:
1071: case RAIDFRAME_SET_COMPONENT_LABEL:
1072: case RAIDFRAME_ADD_HOT_SPARE:
1073: case RAIDFRAME_REMOVE_HOT_SPARE:
1074: case RAIDFRAME_INIT_LABELS:
1.12 oster 1075: case RAIDFRAME_REBUILD_IN_PLACE:
1.23 oster 1076: case RAIDFRAME_CHECK_PARITY:
1.37 oster 1077: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.83 oster 1078: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1.37 oster 1079: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.83 oster 1080: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.48 oster 1081: case RAIDFRAME_SET_AUTOCONFIG:
1082: case RAIDFRAME_SET_ROOT:
1.73 oster 1083: case RAIDFRAME_DELETE_COMPONENT:
1084: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1.269 jld 1085: case RAIDFRAME_PARITYMAP_STATUS:
1086: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1087: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1088: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.298 buhrow 1089: case DIOCGSTRATEGY:
1090: case DIOCSSTRATEGY:
1.1 oster 1091: if ((rs->sc_flags & RAIDF_INITED) == 0)
1092: return (ENXIO);
1093: }
1.9 oster 1094:
1.1 oster 1095: switch (cmd) {
1.254 christos 1096: #ifdef COMPAT_50
1097: case RAIDFRAME_GET_INFO50:
1098: return rf_get_info50(raidPtr, data);
1099:
1100: case RAIDFRAME_CONFIGURE50:
1101: if ((retcode = rf_config50(raidPtr, unit, data, &k_cfg)) != 0)
1102: return retcode;
1103: goto config;
1104: #endif
1.1 oster 1105: /* configure the system */
1106: case RAIDFRAME_CONFIGURE:
1.48 oster 1107:
1108: if (raidPtr->valid) {
1109: /* There is a valid RAID set running on this unit! */
1110: printf("raid%d: Device already configured!\n",unit);
1.66 oster 1111: return(EINVAL);
1.48 oster 1112: }
1113:
1.1 oster 1114: /* copy-in the configuration information */
1115: /* data points to a pointer to the configuration structure */
1.43 oster 1116:
1.9 oster 1117: u_cfg = *((RF_Config_t **) data);
1118: RF_Malloc(k_cfg, sizeof(RF_Config_t), (RF_Config_t *));
1.1 oster 1119: if (k_cfg == NULL) {
1.9 oster 1120: return (ENOMEM);
1.1 oster 1121: }
1.156 dsl 1122: retcode = copyin(u_cfg, k_cfg, sizeof(RF_Config_t));
1.1 oster 1123: if (retcode) {
1.33 oster 1124: RF_Free(k_cfg, sizeof(RF_Config_t));
1.46 oster 1125: db1_printf(("rf_ioctl: retcode=%d copyin.1\n",
1.9 oster 1126: retcode));
1127: return (retcode);
1.1 oster 1128: }
1.254 christos 1129: goto config;
1130: config:
1.9 oster 1131: /* allocate a buffer for the layout-specific data, and copy it
1132: * in */
1.1 oster 1133: if (k_cfg->layoutSpecificSize) {
1.9 oster 1134: if (k_cfg->layoutSpecificSize > 10000) {
1.1 oster 1135: /* sanity check */
1.33 oster 1136: RF_Free(k_cfg, sizeof(RF_Config_t));
1.9 oster 1137: return (EINVAL);
1.1 oster 1138: }
1.9 oster 1139: RF_Malloc(specific_buf, k_cfg->layoutSpecificSize,
1140: (u_char *));
1.1 oster 1141: if (specific_buf == NULL) {
1.9 oster 1142: RF_Free(k_cfg, sizeof(RF_Config_t));
1143: return (ENOMEM);
1.1 oster 1144: }
1.156 dsl 1145: retcode = copyin(k_cfg->layoutSpecific, specific_buf,
1.9 oster 1146: k_cfg->layoutSpecificSize);
1.1 oster 1147: if (retcode) {
1.33 oster 1148: RF_Free(k_cfg, sizeof(RF_Config_t));
1.186 perry 1149: RF_Free(specific_buf,
1.42 oster 1150: k_cfg->layoutSpecificSize);
1.46 oster 1151: db1_printf(("rf_ioctl: retcode=%d copyin.2\n",
1.9 oster 1152: retcode));
1153: return (retcode);
1.1 oster 1154: }
1.9 oster 1155: } else
1156: specific_buf = NULL;
1.1 oster 1157: k_cfg->layoutSpecific = specific_buf;
1.9 oster 1158:
1159: /* should do some kind of sanity check on the configuration.
1160: * Store the sum of all the bytes in the last byte? */
1.1 oster 1161:
1162: /* configure the system */
1163:
1.48 oster 1164: /*
1165: * Clear the entire RAID descriptor, just to make sure
1.186 perry 1166: * there is no stale data left in the case of a
1167: * reconfiguration
1.48 oster 1168: */
1.277 christos 1169: memset(raidPtr, 0, sizeof(*raidPtr));
1.302 christos 1170: raidPtr->softc = rs;
1.42 oster 1171: raidPtr->raidid = unit;
1.20 oster 1172:
1.48 oster 1173: retcode = rf_Configure(raidPtr, k_cfg, NULL);
1.1 oster 1174:
1.40 oster 1175: if (retcode == 0) {
1.37 oster 1176:
1.186 perry 1177: /* allow this many simultaneous IO's to
1.40 oster 1178: this RAID device */
1.42 oster 1179: raidPtr->openings = RAIDOUTSTANDING;
1.186 perry 1180:
1.300 christos 1181: raidinit(rs);
1.59 oster 1182: rf_markalldirty(raidPtr);
1.9 oster 1183: }
1.1 oster 1184: /* free the buffers. No return code here. */
1185: if (k_cfg->layoutSpecificSize) {
1.9 oster 1186: RF_Free(specific_buf, k_cfg->layoutSpecificSize);
1.1 oster 1187: }
1.9 oster 1188: RF_Free(k_cfg, sizeof(RF_Config_t));
1189:
1190: return (retcode);
1191:
1192: /* shutdown the system */
1.1 oster 1193: case RAIDFRAME_SHUTDOWN:
1.9 oster 1194:
1.266 dyoung 1195: part = DISKPART(dev);
1196: pmask = (1 << part);
1197:
1.9 oster 1198: if ((error = raidlock(rs)) != 0)
1199: return (error);
1.1 oster 1200:
1.9 oster 1201: if ((rs->sc_dkdev.dk_openmask & ~pmask) ||
1202: ((rs->sc_dkdev.dk_bopenmask & pmask) &&
1.266 dyoung 1203: (rs->sc_dkdev.dk_copenmask & pmask)))
1204: retcode = EBUSY;
1205: else {
1206: rs->sc_flags |= RAIDF_SHUTDOWN;
1207: rs->sc_dkdev.dk_copenmask &= ~pmask;
1208: rs->sc_dkdev.dk_bopenmask &= ~pmask;
1209: rs->sc_dkdev.dk_openmask &= ~pmask;
1210: retcode = 0;
1.9 oster 1211: }
1.11 oster 1212:
1.266 dyoung 1213: raidunlock(rs);
1.1 oster 1214:
1.266 dyoung 1215: if (retcode != 0)
1216: return retcode;
1.16 oster 1217:
1.217 oster 1218: /* free the pseudo device attach bits */
1219:
1220: cf = device_cfdata(rs->sc_dev);
1.266 dyoung 1221: if ((retcode = config_detach(rs->sc_dev, DETACH_QUIET)) == 0)
1222: free(cf, M_RAIDFRAME);
1.1 oster 1223:
1.9 oster 1224: return (retcode);
1.11 oster 1225: case RAIDFRAME_GET_COMPONENT_LABEL:
1.48 oster 1226: clabel_ptr = (RF_ComponentLabel_t **) data;
1.11 oster 1227: /* need to read the component label for the disk indicated
1.48 oster 1228: by row,column in clabel */
1.11 oster 1229:
1.269 jld 1230: /*
1231: * Perhaps there should be an option to skip the in-core
1232: * copy and hit the disk, as with disklabel(8).
1233: */
1234: RF_Malloc(clabel, sizeof(*clabel), (RF_ComponentLabel_t *));
1.11 oster 1235:
1.277 christos 1236: retcode = copyin(*clabel_ptr, clabel, sizeof(*clabel));
1.11 oster 1237:
1238: if (retcode) {
1.277 christos 1239: RF_Free(clabel, sizeof(*clabel));
1240: return retcode;
1.11 oster 1241: }
1242:
1.166 oster 1243: clabel->row = 0; /* Don't allow looking at anything else.*/
1244:
1.48 oster 1245: column = clabel->column;
1.26 oster 1246:
1.166 oster 1247: if ((column < 0) || (column >= raidPtr->numCol +
1.277 christos 1248: raidPtr->numSpare)) {
1249: RF_Free(clabel, sizeof(*clabel));
1250: return EINVAL;
1.11 oster 1251: }
1252:
1.269 jld 1253: RF_Free(clabel, sizeof(*clabel));
1254:
1255: clabel = raidget_component_label(raidPtr, column);
1.11 oster 1256:
1.277 christos 1257: return copyout(clabel, *clabel_ptr, sizeof(**clabel_ptr));
1.11 oster 1258:
1.269 jld 1259: #if 0
1.11 oster 1260: case RAIDFRAME_SET_COMPONENT_LABEL:
1.48 oster 1261: clabel = (RF_ComponentLabel_t *) data;
1.11 oster 1262:
1263: /* XXX check the label for valid stuff... */
1264: /* Note that some things *should not* get modified --
1.186 perry 1265: the user should be re-initing the labels instead of
1.11 oster 1266: trying to patch things.
1267: */
1268:
1.123 oster 1269: raidid = raidPtr->raidid;
1.224 oster 1270: #ifdef DEBUG
1.123 oster 1271: printf("raid%d: Got component label:\n", raidid);
1272: printf("raid%d: Version: %d\n", raidid, clabel->version);
1273: printf("raid%d: Serial Number: %d\n", raidid, clabel->serial_number);
1274: printf("raid%d: Mod counter: %d\n", raidid, clabel->mod_counter);
1275: printf("raid%d: Column: %d\n", raidid, clabel->column);
1276: printf("raid%d: Num Columns: %d\n", raidid, clabel->num_columns);
1277: printf("raid%d: Clean: %d\n", raidid, clabel->clean);
1278: printf("raid%d: Status: %d\n", raidid, clabel->status);
1.174 oster 1279: #endif
1.166 oster 1280: clabel->row = 0;
1.48 oster 1281: column = clabel->column;
1.12 oster 1282:
1.166 oster 1283: if ((column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1284: return(EINVAL);
1.11 oster 1285: }
1.12 oster 1286:
1287: /* XXX this isn't allowed to do anything for now :-) */
1.48 oster 1288:
1289: /* XXX and before it is, we need to fill in the rest
1290: of the fields!?!?!?! */
1.269 jld 1291: memcpy(raidget_component_label(raidPtr, column),
1292: clabel, sizeof(*clabel));
1293: raidflush_component_label(raidPtr, column);
1294: return (0);
1.12 oster 1295: #endif
1.11 oster 1296:
1.186 perry 1297: case RAIDFRAME_INIT_LABELS:
1.48 oster 1298: clabel = (RF_ComponentLabel_t *) data;
1.186 perry 1299: /*
1.11 oster 1300: we only want the serial number from
1301: the above. We get all the rest of the information
1302: from the config that was used to create this RAID
1.186 perry 1303: set.
1.11 oster 1304: */
1.12 oster 1305:
1.48 oster 1306: raidPtr->serial_number = clabel->serial_number;
1.186 perry 1307:
1.166 oster 1308: for(column=0;column<raidPtr->numCol;column++) {
1309: diskPtr = &raidPtr->Disks[column];
1310: if (!RF_DEAD_DISK(diskPtr->status)) {
1.269 jld 1311: ci_label = raidget_component_label(raidPtr,
1312: column);
1313: /* Zeroing this is important. */
1314: memset(ci_label, 0, sizeof(*ci_label));
1315: raid_init_component_label(raidPtr, ci_label);
1316: ci_label->serial_number =
1317: raidPtr->serial_number;
1318: ci_label->row = 0; /* we dont' pretend to support more */
1.282 enami 1319: rf_component_label_set_partitionsize(ci_label,
1320: diskPtr->partitionSize);
1.209 oster 1321: ci_label->column = column;
1.269 jld 1322: raidflush_component_label(raidPtr, column);
1.11 oster 1323: }
1.269 jld 1324: /* XXXjld what about the spares? */
1.11 oster 1325: }
1.209 oster 1326:
1.11 oster 1327: return (retcode);
1.48 oster 1328: case RAIDFRAME_SET_AUTOCONFIG:
1.78 minoura 1329: d = rf_set_autoconfig(raidPtr, *(int *) data);
1.186 perry 1330: printf("raid%d: New autoconfig value is: %d\n",
1.123 oster 1331: raidPtr->raidid, d);
1.78 minoura 1332: *(int *) data = d;
1.48 oster 1333: return (retcode);
1334:
1335: case RAIDFRAME_SET_ROOT:
1.78 minoura 1336: d = rf_set_rootpartition(raidPtr, *(int *) data);
1.186 perry 1337: printf("raid%d: New rootpartition value is: %d\n",
1.123 oster 1338: raidPtr->raidid, d);
1.78 minoura 1339: *(int *) data = d;
1.48 oster 1340: return (retcode);
1.9 oster 1341:
1.1 oster 1342: /* initialize all parity */
1343: case RAIDFRAME_REWRITEPARITY:
1344:
1.42 oster 1345: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.17 oster 1346: /* Parity for RAID 0 is trivially correct */
1.42 oster 1347: raidPtr->parity_good = RF_RAID_CLEAN;
1.17 oster 1348: return(0);
1349: }
1.186 perry 1350:
1.42 oster 1351: if (raidPtr->parity_rewrite_in_progress == 1) {
1.37 oster 1352: /* Re-write is already in progress! */
1353: return(EINVAL);
1354: }
1.27 oster 1355:
1.42 oster 1356: retcode = RF_CREATE_THREAD(raidPtr->parity_rewrite_thread,
1.37 oster 1357: rf_RewriteParityThread,
1.42 oster 1358: raidPtr,"raid_parity");
1.9 oster 1359: return (retcode);
1360:
1.11 oster 1361:
1362: case RAIDFRAME_ADD_HOT_SPARE:
1.12 oster 1363: sparePtr = (RF_SingleComponent_t *) data;
1.209 oster 1364: memcpy( &component, sparePtr, sizeof(RF_SingleComponent_t));
1365: retcode = rf_add_hot_spare(raidPtr, &component);
1.11 oster 1366: return(retcode);
1367:
1368: case RAIDFRAME_REMOVE_HOT_SPARE:
1.73 oster 1369: return(retcode);
1370:
1371: case RAIDFRAME_DELETE_COMPONENT:
1372: componentPtr = (RF_SingleComponent_t *)data;
1.186 perry 1373: memcpy( &component, componentPtr,
1.73 oster 1374: sizeof(RF_SingleComponent_t));
1375: retcode = rf_delete_component(raidPtr, &component);
1376: return(retcode);
1377:
1378: case RAIDFRAME_INCORPORATE_HOT_SPARE:
1379: componentPtr = (RF_SingleComponent_t *)data;
1.186 perry 1380: memcpy( &component, componentPtr,
1.73 oster 1381: sizeof(RF_SingleComponent_t));
1382: retcode = rf_incorporate_hot_spare(raidPtr, &component);
1.11 oster 1383: return(retcode);
1384:
1.12 oster 1385: case RAIDFRAME_REBUILD_IN_PLACE:
1.24 oster 1386:
1.42 oster 1387: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1388: /* Can't do this on a RAID 0!! */
1389: return(EINVAL);
1390: }
1391:
1.42 oster 1392: if (raidPtr->recon_in_progress == 1) {
1.37 oster 1393: /* a reconstruct is already in progress! */
1394: return(EINVAL);
1395: }
1396:
1.12 oster 1397: componentPtr = (RF_SingleComponent_t *) data;
1.186 perry 1398: memcpy( &component, componentPtr,
1.12 oster 1399: sizeof(RF_SingleComponent_t));
1.166 oster 1400: component.row = 0; /* we don't support any more */
1.12 oster 1401: column = component.column;
1.147 oster 1402:
1.166 oster 1403: if ((column < 0) || (column >= raidPtr->numCol)) {
1.12 oster 1404: return(EINVAL);
1405: }
1.37 oster 1406:
1.291 mrg 1407: rf_lock_mutex2(raidPtr->mutex);
1.166 oster 1408: if ((raidPtr->Disks[column].status == rf_ds_optimal) &&
1.186 perry 1409: (raidPtr->numFailures > 0)) {
1.149 oster 1410: /* XXX 0 above shouldn't be constant!!! */
1411: /* some component other than this has failed.
1412: Let's not make things worse than they already
1413: are... */
1414: printf("raid%d: Unable to reconstruct to disk at:\n",
1415: raidPtr->raidid);
1.166 oster 1416: printf("raid%d: Col: %d Too many failures.\n",
1417: raidPtr->raidid, column);
1.291 mrg 1418: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1419: return (EINVAL);
1420: }
1.186 perry 1421: if (raidPtr->Disks[column].status ==
1.149 oster 1422: rf_ds_reconstructing) {
1423: printf("raid%d: Unable to reconstruct to disk at:\n",
1424: raidPtr->raidid);
1.299 oster 1425: printf("raid%d: Col: %d Reconstruction already occurring!\n", raidPtr->raidid, column);
1.186 perry 1426:
1.291 mrg 1427: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1428: return (EINVAL);
1429: }
1.166 oster 1430: if (raidPtr->Disks[column].status == rf_ds_spared) {
1.291 mrg 1431: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1432: return (EINVAL);
1433: }
1.291 mrg 1434: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1435:
1.37 oster 1436: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1437: if (rrcopy == NULL)
1438: return(ENOMEM);
1.37 oster 1439:
1.42 oster 1440: rrcopy->raidPtr = (void *) raidPtr;
1.37 oster 1441: rrcopy->col = column;
1442:
1.42 oster 1443: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1444: rf_ReconstructInPlaceThread,
1445: rrcopy,"raid_reconip");
1.12 oster 1446: return(retcode);
1447:
1.1 oster 1448: case RAIDFRAME_GET_INFO:
1.42 oster 1449: if (!raidPtr->valid)
1.41 oster 1450: return (ENODEV);
1451: ucfgp = (RF_DeviceConfig_t **) data;
1452: RF_Malloc(d_cfg, sizeof(RF_DeviceConfig_t),
1453: (RF_DeviceConfig_t *));
1454: if (d_cfg == NULL)
1455: return (ENOMEM);
1.166 oster 1456: d_cfg->rows = 1; /* there is only 1 row now */
1.42 oster 1457: d_cfg->cols = raidPtr->numCol;
1.166 oster 1458: d_cfg->ndevs = raidPtr->numCol;
1.41 oster 1459: if (d_cfg->ndevs >= RF_MAX_DISKS) {
1460: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1461: return (ENOMEM);
1462: }
1.42 oster 1463: d_cfg->nspares = raidPtr->numSpare;
1.41 oster 1464: if (d_cfg->nspares >= RF_MAX_DISKS) {
1465: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1466: return (ENOMEM);
1467: }
1.42 oster 1468: d_cfg->maxqdepth = raidPtr->maxQueueDepth;
1.41 oster 1469: d = 0;
1.166 oster 1470: for (j = 0; j < d_cfg->cols; j++) {
1471: d_cfg->devs[d] = raidPtr->Disks[j];
1472: d++;
1.41 oster 1473: }
1474: for (j = d_cfg->cols, i = 0; i < d_cfg->nspares; i++, j++) {
1.166 oster 1475: d_cfg->spares[i] = raidPtr->Disks[j];
1.41 oster 1476: }
1.156 dsl 1477: retcode = copyout(d_cfg, *ucfgp, sizeof(RF_DeviceConfig_t));
1.41 oster 1478: RF_Free(d_cfg, sizeof(RF_DeviceConfig_t));
1479:
1480: return (retcode);
1.9 oster 1481:
1.22 oster 1482: case RAIDFRAME_CHECK_PARITY:
1.42 oster 1483: *(int *) data = raidPtr->parity_good;
1.22 oster 1484: return (0);
1.41 oster 1485:
1.269 jld 1486: case RAIDFRAME_PARITYMAP_STATUS:
1.273 jld 1487: if (rf_paritymap_ineligible(raidPtr))
1488: return EINVAL;
1.269 jld 1489: rf_paritymap_status(raidPtr->parity_map,
1490: (struct rf_pmstat *)data);
1491: return 0;
1492:
1493: case RAIDFRAME_PARITYMAP_SET_PARAMS:
1.273 jld 1494: if (rf_paritymap_ineligible(raidPtr))
1495: return EINVAL;
1.269 jld 1496: if (raidPtr->parity_map == NULL)
1497: return ENOENT; /* ??? */
1498: if (0 != rf_paritymap_set_params(raidPtr->parity_map,
1499: (struct rf_pmparams *)data, 1))
1500: return EINVAL;
1501: return 0;
1502:
1503: case RAIDFRAME_PARITYMAP_GET_DISABLE:
1.273 jld 1504: if (rf_paritymap_ineligible(raidPtr))
1505: return EINVAL;
1.269 jld 1506: *(int *) data = rf_paritymap_get_disable(raidPtr);
1507: return 0;
1508:
1509: case RAIDFRAME_PARITYMAP_SET_DISABLE:
1.273 jld 1510: if (rf_paritymap_ineligible(raidPtr))
1511: return EINVAL;
1.269 jld 1512: rf_paritymap_set_disable(raidPtr, *(int *)data);
1513: /* XXX should errors be passed up? */
1514: return 0;
1515:
1.1 oster 1516: case RAIDFRAME_RESET_ACCTOTALS:
1.108 thorpej 1517: memset(&raidPtr->acc_totals, 0, sizeof(raidPtr->acc_totals));
1.41 oster 1518: return (0);
1.9 oster 1519:
1.1 oster 1520: case RAIDFRAME_GET_ACCTOTALS:
1.41 oster 1521: totals = (RF_AccTotals_t *) data;
1.42 oster 1522: *totals = raidPtr->acc_totals;
1.41 oster 1523: return (0);
1.9 oster 1524:
1.1 oster 1525: case RAIDFRAME_KEEP_ACCTOTALS:
1.42 oster 1526: raidPtr->keep_acc_totals = *(int *)data;
1.41 oster 1527: return (0);
1.9 oster 1528:
1.1 oster 1529: case RAIDFRAME_GET_SIZE:
1.42 oster 1530: *(int *) data = raidPtr->totalSectors;
1.9 oster 1531: return (0);
1.1 oster 1532:
1533: /* fail a disk & optionally start reconstruction */
1534: case RAIDFRAME_FAIL_DISK:
1.24 oster 1535:
1.42 oster 1536: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1537: /* Can't do this on a RAID 0!! */
1538: return(EINVAL);
1539: }
1540:
1.1 oster 1541: rr = (struct rf_recon_req *) data;
1.166 oster 1542: rr->row = 0;
1543: if (rr->col < 0 || rr->col >= raidPtr->numCol)
1.9 oster 1544: return (EINVAL);
1.149 oster 1545:
1546:
1.291 mrg 1547: rf_lock_mutex2(raidPtr->mutex);
1.185 oster 1548: if (raidPtr->status == rf_rs_reconstructing) {
1549: /* you can't fail a disk while we're reconstructing! */
1550: /* XXX wrong for RAID6 */
1.291 mrg 1551: rf_unlock_mutex2(raidPtr->mutex);
1.185 oster 1552: return (EINVAL);
1553: }
1.186 perry 1554: if ((raidPtr->Disks[rr->col].status ==
1555: rf_ds_optimal) && (raidPtr->numFailures > 0)) {
1.149 oster 1556: /* some other component has failed. Let's not make
1557: things worse. XXX wrong for RAID6 */
1.291 mrg 1558: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1559: return (EINVAL);
1560: }
1.166 oster 1561: if (raidPtr->Disks[rr->col].status == rf_ds_spared) {
1.149 oster 1562: /* Can't fail a spared disk! */
1.291 mrg 1563: rf_unlock_mutex2(raidPtr->mutex);
1.149 oster 1564: return (EINVAL);
1565: }
1.291 mrg 1566: rf_unlock_mutex2(raidPtr->mutex);
1.1 oster 1567:
1.9 oster 1568: /* make a copy of the recon request so that we don't rely on
1569: * the user's buffer */
1.1 oster 1570: RF_Malloc(rrcopy, sizeof(*rrcopy), (struct rf_recon_req *));
1.38 oster 1571: if (rrcopy == NULL)
1572: return(ENOMEM);
1.118 wiz 1573: memcpy(rrcopy, rr, sizeof(*rr));
1.42 oster 1574: rrcopy->raidPtr = (void *) raidPtr;
1.1 oster 1575:
1.42 oster 1576: retcode = RF_CREATE_THREAD(raidPtr->recon_thread,
1.37 oster 1577: rf_ReconThread,
1578: rrcopy,"raid_recon");
1.9 oster 1579: return (0);
1580:
1581: /* invoke a copyback operation after recon on whatever disk
1582: * needs it, if any */
1583: case RAIDFRAME_COPYBACK:
1.24 oster 1584:
1.42 oster 1585: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.24 oster 1586: /* This makes no sense on a RAID 0!! */
1587: return(EINVAL);
1588: }
1589:
1.42 oster 1590: if (raidPtr->copyback_in_progress == 1) {
1.37 oster 1591: /* Copyback is already in progress! */
1592: return(EINVAL);
1593: }
1.27 oster 1594:
1.42 oster 1595: retcode = RF_CREATE_THREAD(raidPtr->copyback_thread,
1.37 oster 1596: rf_CopybackThread,
1.42 oster 1597: raidPtr,"raid_copyback");
1.37 oster 1598: return (retcode);
1.9 oster 1599:
1.1 oster 1600: /* return the percentage completion of reconstruction */
1.37 oster 1601: case RAIDFRAME_CHECK_RECON_STATUS:
1.42 oster 1602: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.71 oster 1603: /* This makes no sense on a RAID 0, so tell the
1604: user it's done. */
1605: *(int *) data = 100;
1606: return(0);
1.24 oster 1607: }
1.166 oster 1608: if (raidPtr->status != rf_rs_reconstructing)
1.1 oster 1609: *(int *) data = 100;
1.171 oster 1610: else {
1611: if (raidPtr->reconControl->numRUsTotal > 0) {
1612: *(int *) data = (raidPtr->reconControl->numRUsComplete * 100 / raidPtr->reconControl->numRUsTotal);
1613: } else {
1614: *(int *) data = 0;
1615: }
1616: }
1.9 oster 1617: return (0);
1.83 oster 1618: case RAIDFRAME_CHECK_RECON_STATUS_EXT:
1619: progressInfoPtr = (RF_ProgressInfo_t **) data;
1.166 oster 1620: if (raidPtr->status != rf_rs_reconstructing) {
1.83 oster 1621: progressInfo.remaining = 0;
1622: progressInfo.completed = 100;
1623: progressInfo.total = 100;
1624: } else {
1.186 perry 1625: progressInfo.total =
1.166 oster 1626: raidPtr->reconControl->numRUsTotal;
1.186 perry 1627: progressInfo.completed =
1.166 oster 1628: raidPtr->reconControl->numRUsComplete;
1.83 oster 1629: progressInfo.remaining = progressInfo.total -
1630: progressInfo.completed;
1631: }
1.156 dsl 1632: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1633: sizeof(RF_ProgressInfo_t));
1634: return (retcode);
1.9 oster 1635:
1.37 oster 1636: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS:
1.42 oster 1637: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.80 oster 1638: /* This makes no sense on a RAID 0, so tell the
1639: user it's done. */
1640: *(int *) data = 100;
1641: return(0);
1.37 oster 1642: }
1.42 oster 1643: if (raidPtr->parity_rewrite_in_progress == 1) {
1.186 perry 1644: *(int *) data = 100 *
1645: raidPtr->parity_rewrite_stripes_done /
1.83 oster 1646: raidPtr->Layout.numStripe;
1.37 oster 1647: } else {
1648: *(int *) data = 100;
1649: }
1650: return (0);
1651:
1.83 oster 1652: case RAIDFRAME_CHECK_PARITYREWRITE_STATUS_EXT:
1653: progressInfoPtr = (RF_ProgressInfo_t **) data;
1654: if (raidPtr->parity_rewrite_in_progress == 1) {
1655: progressInfo.total = raidPtr->Layout.numStripe;
1.186 perry 1656: progressInfo.completed =
1.83 oster 1657: raidPtr->parity_rewrite_stripes_done;
1658: progressInfo.remaining = progressInfo.total -
1659: progressInfo.completed;
1660: } else {
1661: progressInfo.remaining = 0;
1662: progressInfo.completed = 100;
1663: progressInfo.total = 100;
1664: }
1.156 dsl 1665: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1666: sizeof(RF_ProgressInfo_t));
1667: return (retcode);
1668:
1.37 oster 1669: case RAIDFRAME_CHECK_COPYBACK_STATUS:
1.42 oster 1670: if (raidPtr->Layout.map->faultsTolerated == 0) {
1.37 oster 1671: /* This makes no sense on a RAID 0 */
1.83 oster 1672: *(int *) data = 100;
1673: return(0);
1.37 oster 1674: }
1.42 oster 1675: if (raidPtr->copyback_in_progress == 1) {
1676: *(int *) data = 100 * raidPtr->copyback_stripes_done /
1677: raidPtr->Layout.numStripe;
1.37 oster 1678: } else {
1679: *(int *) data = 100;
1680: }
1681: return (0);
1682:
1.83 oster 1683: case RAIDFRAME_CHECK_COPYBACK_STATUS_EXT:
1.93 oster 1684: progressInfoPtr = (RF_ProgressInfo_t **) data;
1.83 oster 1685: if (raidPtr->copyback_in_progress == 1) {
1686: progressInfo.total = raidPtr->Layout.numStripe;
1.186 perry 1687: progressInfo.completed =
1.93 oster 1688: raidPtr->copyback_stripes_done;
1.83 oster 1689: progressInfo.remaining = progressInfo.total -
1690: progressInfo.completed;
1691: } else {
1692: progressInfo.remaining = 0;
1693: progressInfo.completed = 100;
1694: progressInfo.total = 100;
1695: }
1.156 dsl 1696: retcode = copyout(&progressInfo, *progressInfoPtr,
1.83 oster 1697: sizeof(RF_ProgressInfo_t));
1698: return (retcode);
1.37 oster 1699:
1.9 oster 1700: /* the sparetable daemon calls this to wait for the kernel to
1701: * need a spare table. this ioctl does not return until a
1702: * spare table is needed. XXX -- calling mpsleep here in the
1703: * ioctl code is almost certainly wrong and evil. -- XXX XXX
1704: * -- I should either compute the spare table in the kernel,
1705: * or have a different -- XXX XXX -- interface (a different
1.42 oster 1706: * character device) for delivering the table -- XXX */
1.250 oster 1707: #if 0
1.1 oster 1708: case RAIDFRAME_SPARET_WAIT:
1.287 mrg 1709: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1710: while (!rf_sparet_wait_queue)
1.287 mrg 1711: rf_wait_cond2(rf_sparet_wait_cv, rf_sparet_wait_mutex);
1.1 oster 1712: waitreq = rf_sparet_wait_queue;
1713: rf_sparet_wait_queue = rf_sparet_wait_queue->next;
1.287 mrg 1714: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1715:
1.42 oster 1716: /* structure assignment */
1.186 perry 1717: *((RF_SparetWait_t *) data) = *waitreq;
1.9 oster 1718:
1.1 oster 1719: RF_Free(waitreq, sizeof(*waitreq));
1.9 oster 1720: return (0);
1721:
1722: /* wakes up a process waiting on SPARET_WAIT and puts an error
1723: * code in it that will cause the dameon to exit */
1.1 oster 1724: case RAIDFRAME_ABORT_SPARET_WAIT:
1725: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1726: waitreq->fcol = -1;
1.287 mrg 1727: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1728: waitreq->next = rf_sparet_wait_queue;
1729: rf_sparet_wait_queue = waitreq;
1.287 mrg 1730: rf_broadcast_conf2(rf_sparet_wait_cv);
1731: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1732: return (0);
1.1 oster 1733:
1.9 oster 1734: /* used by the spare table daemon to deliver a spare table
1735: * into the kernel */
1.1 oster 1736: case RAIDFRAME_SEND_SPARET:
1.9 oster 1737:
1.1 oster 1738: /* install the spare table */
1.42 oster 1739: retcode = rf_SetSpareTable(raidPtr, *(void **) data);
1.9 oster 1740:
1741: /* respond to the requestor. the return status of the spare
1742: * table installation is passed in the "fcol" field */
1.1 oster 1743: RF_Malloc(waitreq, sizeof(*waitreq), (RF_SparetWait_t *));
1744: waitreq->fcol = retcode;
1.287 mrg 1745: rf_lock_mutex2(rf_sparet_wait_mutex);
1.1 oster 1746: waitreq->next = rf_sparet_resp_queue;
1747: rf_sparet_resp_queue = waitreq;
1.287 mrg 1748: rf_broadcast_cond2(rf_sparet_resp_cv);
1749: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1750:
1751: return (retcode);
1.1 oster 1752: #endif
1753:
1.9 oster 1754: default:
1.36 oster 1755: break; /* fall through to the os-specific code below */
1.1 oster 1756:
1757: }
1.9 oster 1758:
1.42 oster 1759: if (!raidPtr->valid)
1.9 oster 1760: return (EINVAL);
1761:
1.1 oster 1762: /*
1763: * Add support for "regular" device ioctls here.
1764: */
1.263 haad 1765:
1.264 haad 1766: error = disk_ioctl(&rs->sc_dkdev, cmd, data, flag, l);
1.263 haad 1767: if (error != EPASSTHROUGH)
1768: return (error);
1.9 oster 1769:
1.1 oster 1770: switch (cmd) {
1771: case DIOCGDINFO:
1.9 oster 1772: *(struct disklabel *) data = *(rs->sc_dkdev.dk_label);
1.1 oster 1773: break;
1.102 fvdl 1774: #ifdef __HAVE_OLD_DISKLABEL
1775: case ODIOCGDINFO:
1776: newlabel = *(rs->sc_dkdev.dk_label);
1777: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1778: return ENOTTY;
1.102 fvdl 1779: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1780: break;
1781: #endif
1.1 oster 1782:
1783: case DIOCGPART:
1.9 oster 1784: ((struct partinfo *) data)->disklab = rs->sc_dkdev.dk_label;
1785: ((struct partinfo *) data)->part =
1.1 oster 1786: &rs->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
1787: break;
1788:
1789: case DIOCWDINFO:
1790: case DIOCSDINFO:
1.102 fvdl 1791: #ifdef __HAVE_OLD_DISKLABEL
1792: case ODIOCWDINFO:
1793: case ODIOCSDINFO:
1794: #endif
1795: {
1796: struct disklabel *lp;
1797: #ifdef __HAVE_OLD_DISKLABEL
1798: if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
1799: memset(&newlabel, 0, sizeof newlabel);
1800: memcpy(&newlabel, data, sizeof (struct olddisklabel));
1801: lp = &newlabel;
1802: } else
1803: #endif
1804: lp = (struct disklabel *)data;
1805:
1.1 oster 1806: if ((error = raidlock(rs)) != 0)
1807: return (error);
1808:
1809: rs->sc_flags |= RAIDF_LABELLING;
1810:
1811: error = setdisklabel(rs->sc_dkdev.dk_label,
1.102 fvdl 1812: lp, 0, rs->sc_dkdev.dk_cpulabel);
1.1 oster 1813: if (error == 0) {
1.102 fvdl 1814: if (cmd == DIOCWDINFO
1815: #ifdef __HAVE_OLD_DISKLABEL
1816: || cmd == ODIOCWDINFO
1817: #endif
1818: )
1.1 oster 1819: error = writedisklabel(RAIDLABELDEV(dev),
1820: raidstrategy, rs->sc_dkdev.dk_label,
1821: rs->sc_dkdev.dk_cpulabel);
1822: }
1823: rs->sc_flags &= ~RAIDF_LABELLING;
1824:
1825: raidunlock(rs);
1826:
1827: if (error)
1828: return (error);
1829: break;
1.102 fvdl 1830: }
1.1 oster 1831:
1832: case DIOCWLABEL:
1.9 oster 1833: if (*(int *) data != 0)
1.1 oster 1834: rs->sc_flags |= RAIDF_WLABEL;
1835: else
1836: rs->sc_flags &= ~RAIDF_WLABEL;
1837: break;
1838:
1839: case DIOCGDEFLABEL:
1.102 fvdl 1840: raidgetdefaultlabel(raidPtr, rs, (struct disklabel *) data);
1.1 oster 1841: break;
1.102 fvdl 1842:
1843: #ifdef __HAVE_OLD_DISKLABEL
1844: case ODIOCGDEFLABEL:
1845: raidgetdefaultlabel(raidPtr, rs, &newlabel);
1846: if (newlabel.d_npartitions > OLDMAXPARTITIONS)
1.103 fvdl 1847: return ENOTTY;
1.102 fvdl 1848: memcpy(data, &newlabel, sizeof (struct olddisklabel));
1849: break;
1850: #endif
1.1 oster 1851:
1.213 christos 1852: case DIOCAWEDGE:
1853: case DIOCDWEDGE:
1854: dkw = (void *)data;
1855:
1856: /* If the ioctl happens here, the parent is us. */
1857: (void)strcpy(dkw->dkw_parent, rs->sc_xname);
1858: return cmd == DIOCAWEDGE ? dkwedge_add(dkw) : dkwedge_del(dkw);
1859:
1860: case DIOCLWEDGES:
1861: return dkwedge_list(&rs->sc_dkdev,
1862: (struct dkwedge_list *)data, l);
1.252 oster 1863: case DIOCCACHESYNC:
1864: return rf_sync_component_caches(raidPtr);
1.298 buhrow 1865:
1866: case DIOCGSTRATEGY:
1867: {
1868: struct disk_strategy *dks = (void *)data;
1869:
1870: s = splbio();
1871: strlcpy(dks->dks_name, bufq_getstrategyname(rs->buf_queue),
1872: sizeof(dks->dks_name));
1873: splx(s);
1874: dks->dks_paramlen = 0;
1875:
1876: return 0;
1877: }
1878:
1879: case DIOCSSTRATEGY:
1880: {
1881: struct disk_strategy *dks = (void *)data;
1882: struct bufq_state *new;
1883: struct bufq_state *old;
1884:
1885: if (dks->dks_param != NULL) {
1886: return EINVAL;
1887: }
1888: dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1889: error = bufq_alloc(&new, dks->dks_name,
1890: BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1891: if (error) {
1892: return error;
1893: }
1894: s = splbio();
1895: old = rs->buf_queue;
1896: bufq_move(new, old);
1897: rs->buf_queue = new;
1898: splx(s);
1899: bufq_free(old);
1900:
1901: return 0;
1902: }
1903:
1.1 oster 1904: default:
1.39 oster 1905: retcode = ENOTTY;
1.1 oster 1906: }
1.9 oster 1907: return (retcode);
1.1 oster 1908:
1909: }
1910:
1911:
1.9 oster 1912: /* raidinit -- complete the rest of the initialization for the
1.1 oster 1913: RAIDframe device. */
1914:
1915:
1.59 oster 1916: static void
1.300 christos 1917: raidinit(struct raid_softc *rs)
1.1 oster 1918: {
1.262 cegger 1919: cfdata_t cf;
1.59 oster 1920: int unit;
1.300 christos 1921: RF_Raid_t *raidPtr = &rs->sc_r;
1.1 oster 1922:
1.59 oster 1923: unit = raidPtr->raidid;
1.1 oster 1924:
1925:
1926: /* XXX should check return code first... */
1927: rs->sc_flags |= RAIDF_INITED;
1928:
1.179 itojun 1929: /* XXX doesn't check bounds. */
1930: snprintf(rs->sc_xname, sizeof(rs->sc_xname), "raid%d", unit);
1.1 oster 1931:
1.217 oster 1932: /* attach the pseudo device */
1933: cf = malloc(sizeof(*cf), M_RAIDFRAME, M_WAITOK);
1934: cf->cf_name = raid_cd.cd_name;
1935: cf->cf_atname = raid_cd.cd_name;
1936: cf->cf_unit = unit;
1937: cf->cf_fstate = FSTATE_STAR;
1938:
1939: rs->sc_dev = config_attach_pseudo(cf);
1940:
1.270 christos 1941: if (rs->sc_dev == NULL) {
1.217 oster 1942: printf("raid%d: config_attach_pseudo failed\n",
1.270 christos 1943: raidPtr->raidid);
1.265 pooka 1944: rs->sc_flags &= ~RAIDF_INITED;
1945: free(cf, M_RAIDFRAME);
1946: return;
1.217 oster 1947: }
1948:
1.1 oster 1949: /* disk_attach actually creates space for the CPU disklabel, among
1.9 oster 1950: * other things, so it's critical to call this *BEFORE* we try putzing
1951: * with disklabels. */
1.11 oster 1952:
1.235 oster 1953: disk_init(&rs->sc_dkdev, rs->sc_xname, &rf_dkdriver);
1.219 oster 1954: disk_attach(&rs->sc_dkdev);
1.275 mrg 1955: disk_blocksize(&rs->sc_dkdev, raidPtr->bytesPerSector);
1.1 oster 1956:
1957: /* XXX There may be a weird interaction here between this, and
1.9 oster 1958: * protectedSectors, as used in RAIDframe. */
1.11 oster 1959:
1.9 oster 1960: rs->sc_size = raidPtr->totalSectors;
1.234 oster 1961:
1962: dkwedge_discover(&rs->sc_dkdev);
1963:
1964: rf_set_properties(rs, raidPtr);
1965:
1.1 oster 1966: }
1.150 oster 1967: #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
1.1 oster 1968: /* wake up the daemon & tell it to get us a spare table
1969: * XXX
1.9 oster 1970: * the entries in the queues should be tagged with the raidPtr
1.186 perry 1971: * so that in the extremely rare case that two recons happen at once,
1.11 oster 1972: * we know for which device were requesting a spare table
1.1 oster 1973: * XXX
1.186 perry 1974: *
1.39 oster 1975: * XXX This code is not currently used. GO
1.1 oster 1976: */
1.186 perry 1977: int
1.169 oster 1978: rf_GetSpareTableFromDaemon(RF_SparetWait_t *req)
1.9 oster 1979: {
1980: int retcode;
1981:
1.287 mrg 1982: rf_lock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1983: req->next = rf_sparet_wait_queue;
1984: rf_sparet_wait_queue = req;
1.289 mrg 1985: rf_broadcast_cond2(rf_sparet_wait_cv);
1.9 oster 1986:
1987: /* mpsleep unlocks the mutex */
1988: while (!rf_sparet_resp_queue) {
1.289 mrg 1989: rf_wait_cond2(rf_sparet_resp_cv, rf_sparet_wait_mutex);
1.9 oster 1990: }
1991: req = rf_sparet_resp_queue;
1992: rf_sparet_resp_queue = req->next;
1.287 mrg 1993: rf_unlock_mutex2(rf_sparet_wait_mutex);
1.9 oster 1994:
1995: retcode = req->fcol;
1996: RF_Free(req, sizeof(*req)); /* this is not the same req as we
1997: * alloc'd */
1998: return (retcode);
1.1 oster 1999: }
1.150 oster 2000: #endif
1.39 oster 2001:
1.186 perry 2002: /* a wrapper around rf_DoAccess that extracts appropriate info from the
1.11 oster 2003: * bp & passes it down.
1.1 oster 2004: * any calls originating in the kernel must use non-blocking I/O
2005: * do some extra sanity checking to return "appropriate" error values for
2006: * certain conditions (to make some standard utilities work)
1.186 perry 2007: *
1.34 oster 2008: * Formerly known as: rf_DoAccessKernel
1.1 oster 2009: */
1.34 oster 2010: void
1.169 oster 2011: raidstart(RF_Raid_t *raidPtr)
1.1 oster 2012: {
2013: RF_SectorCount_t num_blocks, pb, sum;
2014: RF_RaidAddr_t raid_addr;
2015: struct partition *pp;
1.9 oster 2016: daddr_t blocknum;
1.1 oster 2017: struct raid_softc *rs;
1.9 oster 2018: int do_async;
1.34 oster 2019: struct buf *bp;
1.180 oster 2020: int rc;
1.1 oster 2021:
1.300 christos 2022: rs = raidPtr->softc;
1.56 oster 2023: /* quick check to see if anything has died recently */
1.291 mrg 2024: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 2025: if (raidPtr->numNewFailures > 0) {
1.291 mrg 2026: rf_unlock_mutex2(raidPtr->mutex);
1.186 perry 2027: rf_update_component_labels(raidPtr,
1.91 oster 2028: RF_NORMAL_COMPONENT_UPDATE);
1.291 mrg 2029: rf_lock_mutex2(raidPtr->mutex);
1.56 oster 2030: raidPtr->numNewFailures--;
2031: }
2032:
1.34 oster 2033: /* Check to see if we're at the limit... */
2034: while (raidPtr->openings > 0) {
1.291 mrg 2035: rf_unlock_mutex2(raidPtr->mutex);
1.34 oster 2036:
2037: /* get the next item, if any, from the queue */
1.253 yamt 2038: if ((bp = bufq_get(rs->buf_queue)) == NULL) {
1.34 oster 2039: /* nothing more to do */
2040: return;
2041: }
2042:
2043: /* Ok, for the bp we have here, bp->b_blkno is relative to the
1.186 perry 2044: * partition.. Need to make it absolute to the underlying
1.34 oster 2045: * device.. */
1.1 oster 2046:
1.275 mrg 2047: blocknum = bp->b_blkno << DEV_BSHIFT >> raidPtr->logBytesPerSector;
1.34 oster 2048: if (DISKPART(bp->b_dev) != RAW_PART) {
2049: pp = &rs->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
2050: blocknum += pp->p_offset;
2051: }
1.1 oster 2052:
1.186 perry 2053: db1_printf(("Blocks: %d, %d\n", (int) bp->b_blkno,
1.34 oster 2054: (int) blocknum));
1.186 perry 2055:
1.34 oster 2056: db1_printf(("bp->b_bcount = %d\n", (int) bp->b_bcount));
2057: db1_printf(("bp->b_resid = %d\n", (int) bp->b_resid));
1.186 perry 2058:
2059: /* *THIS* is where we adjust what block we're going to...
1.34 oster 2060: * but DO NOT TOUCH bp->b_blkno!!! */
2061: raid_addr = blocknum;
1.186 perry 2062:
1.34 oster 2063: num_blocks = bp->b_bcount >> raidPtr->logBytesPerSector;
2064: pb = (bp->b_bcount & raidPtr->sectorMask) ? 1 : 0;
2065: sum = raid_addr + num_blocks + pb;
2066: if (1 || rf_debugKernelAccess) {
2067: db1_printf(("raid_addr=%d sum=%d num_blocks=%d(+%d) (%d)\n",
2068: (int) raid_addr, (int) sum, (int) num_blocks,
2069: (int) pb, (int) bp->b_resid));
2070: }
2071: if ((sum > raidPtr->totalSectors) || (sum < raid_addr)
2072: || (sum < num_blocks) || (sum < pb)) {
2073: bp->b_error = ENOSPC;
2074: bp->b_resid = bp->b_bcount;
2075: biodone(bp);
1.291 mrg 2076: rf_lock_mutex2(raidPtr->mutex);
1.34 oster 2077: continue;
2078: }
2079: /*
2080: * XXX rf_DoAccess() should do this, not just DoAccessKernel()
2081: */
1.186 perry 2082:
1.34 oster 2083: if (bp->b_bcount & raidPtr->sectorMask) {
2084: bp->b_error = EINVAL;
2085: bp->b_resid = bp->b_bcount;
2086: biodone(bp);
1.291 mrg 2087: rf_lock_mutex2(raidPtr->mutex);
1.34 oster 2088: continue;
1.186 perry 2089:
1.34 oster 2090: }
2091: db1_printf(("Calling DoAccess..\n"));
1.186 perry 2092:
1.1 oster 2093:
1.291 mrg 2094: rf_lock_mutex2(raidPtr->mutex);
1.34 oster 2095: raidPtr->openings--;
1.291 mrg 2096: rf_unlock_mutex2(raidPtr->mutex);
1.1 oster 2097:
1.34 oster 2098: /*
2099: * Everything is async.
2100: */
2101: do_async = 1;
1.186 perry 2102:
1.99 oster 2103: disk_busy(&rs->sc_dkdev);
2104:
1.186 perry 2105: /* XXX we're still at splbio() here... do we *really*
1.34 oster 2106: need to be? */
1.20 oster 2107:
1.186 perry 2108: /* don't ever condition on bp->b_flags & B_WRITE.
1.99 oster 2109: * always condition on B_READ instead */
1.186 perry 2110:
1.180 oster 2111: rc = rf_DoAccess(raidPtr, (bp->b_flags & B_READ) ?
2112: RF_IO_TYPE_READ : RF_IO_TYPE_WRITE,
2113: do_async, raid_addr, num_blocks,
2114: bp->b_data, bp, RF_DAG_NONBLOCKING_IO);
1.151 oster 2115:
1.180 oster 2116: if (rc) {
2117: bp->b_error = rc;
2118: bp->b_resid = bp->b_bcount;
2119: biodone(bp);
2120: /* continue loop */
1.186 perry 2121: }
1.20 oster 2122:
1.291 mrg 2123: rf_lock_mutex2(raidPtr->mutex);
1.20 oster 2124: }
1.291 mrg 2125: rf_unlock_mutex2(raidPtr->mutex);
1.34 oster 2126: }
1.20 oster 2127:
2128:
1.7 explorer 2129:
2130:
1.1 oster 2131: /* invoke an I/O from kernel mode. Disk queue should be locked upon entry */
2132:
1.186 perry 2133: int
1.169 oster 2134: rf_DispatchKernelIO(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req)
1.1 oster 2135: {
1.9 oster 2136: int op = (req->type == RF_IO_TYPE_READ) ? B_READ : B_WRITE;
1.1 oster 2137: struct buf *bp;
1.9 oster 2138:
1.1 oster 2139: req->queue = queue;
2140: bp = req->bp;
2141:
2142: switch (req->type) {
1.9 oster 2143: case RF_IO_TYPE_NOP: /* used primarily to unlock a locked queue */
1.1 oster 2144: /* XXX need to do something extra here.. */
1.9 oster 2145: /* I'm leaving this in, as I've never actually seen it used,
2146: * and I'd like folks to report it... GO */
1.1 oster 2147: printf(("WAKEUP CALLED\n"));
2148: queue->numOutstanding++;
2149:
1.197 oster 2150: bp->b_flags = 0;
1.207 simonb 2151: bp->b_private = req;
1.1 oster 2152:
1.194 oster 2153: KernelWakeupFunc(bp);
1.1 oster 2154: break;
1.9 oster 2155:
1.1 oster 2156: case RF_IO_TYPE_READ:
2157: case RF_IO_TYPE_WRITE:
1.175 oster 2158: #if RF_ACC_TRACE > 0
1.1 oster 2159: if (req->tracerec) {
2160: RF_ETIMER_START(req->tracerec->timer);
2161: }
1.175 oster 2162: #endif
1.194 oster 2163: InitBP(bp, queue->rf_cinfo->ci_vp,
1.197 oster 2164: op, queue->rf_cinfo->ci_dev,
1.9 oster 2165: req->sectorOffset, req->numSector,
2166: req->buf, KernelWakeupFunc, (void *) req,
2167: queue->raidPtr->logBytesPerSector, req->b_proc);
1.1 oster 2168:
2169: if (rf_debugKernelAccess) {
1.9 oster 2170: db1_printf(("dispatch: bp->b_blkno = %ld\n",
2171: (long) bp->b_blkno));
1.1 oster 2172: }
2173: queue->numOutstanding++;
2174: queue->last_deq_sector = req->sectorOffset;
1.9 oster 2175: /* acc wouldn't have been let in if there were any pending
2176: * reqs at any other priority */
1.1 oster 2177: queue->curPriority = req->priority;
2178:
1.166 oster 2179: db1_printf(("Going for %c to unit %d col %d\n",
1.186 perry 2180: req->type, queue->raidPtr->raidid,
1.166 oster 2181: queue->col));
1.1 oster 2182: db1_printf(("sector %d count %d (%d bytes) %d\n",
1.9 oster 2183: (int) req->sectorOffset, (int) req->numSector,
2184: (int) (req->numSector <<
2185: queue->raidPtr->logBytesPerSector),
2186: (int) queue->raidPtr->logBytesPerSector));
1.256 oster 2187:
2188: /*
2189: * XXX: drop lock here since this can block at
2190: * least with backing SCSI devices. Retake it
2191: * to minimize fuss with calling interfaces.
2192: */
2193:
2194: RF_UNLOCK_QUEUE_MUTEX(queue, "unusedparam");
1.247 oster 2195: bdev_strategy(bp);
1.256 oster 2196: RF_LOCK_QUEUE_MUTEX(queue, "unusedparam");
1.1 oster 2197: break;
1.9 oster 2198:
1.1 oster 2199: default:
2200: panic("bad req->type in rf_DispatchKernelIO");
2201: }
2202: db1_printf(("Exiting from DispatchKernelIO\n"));
1.134 oster 2203:
1.9 oster 2204: return (0);
1.1 oster 2205: }
1.9 oster 2206: /* this is the callback function associated with a I/O invoked from
1.1 oster 2207: kernel code.
2208: */
1.186 perry 2209: static void
1.194 oster 2210: KernelWakeupFunc(struct buf *bp)
1.9 oster 2211: {
2212: RF_DiskQueueData_t *req = NULL;
2213: RF_DiskQueue_t *queue;
2214:
2215: db1_printf(("recovering the request queue:\n"));
1.285 mrg 2216:
1.207 simonb 2217: req = bp->b_private;
1.1 oster 2218:
1.9 oster 2219: queue = (RF_DiskQueue_t *) req->queue;
1.1 oster 2220:
1.286 mrg 2221: rf_lock_mutex2(queue->raidPtr->iodone_lock);
1.285 mrg 2222:
1.175 oster 2223: #if RF_ACC_TRACE > 0
1.9 oster 2224: if (req->tracerec) {
2225: RF_ETIMER_STOP(req->tracerec->timer);
2226: RF_ETIMER_EVAL(req->tracerec->timer);
1.288 mrg 2227: rf_lock_mutex2(rf_tracing_mutex);
1.9 oster 2228: req->tracerec->diskwait_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2229: req->tracerec->phys_io_us += RF_ETIMER_VAL_US(req->tracerec->timer);
2230: req->tracerec->num_phys_ios++;
1.288 mrg 2231: rf_unlock_mutex2(rf_tracing_mutex);
1.9 oster 2232: }
1.175 oster 2233: #endif
1.1 oster 2234:
1.230 ad 2235: /* XXX Ok, let's get aggressive... If b_error is set, let's go
1.9 oster 2236: * ballistic, and mark the component as hosed... */
1.36 oster 2237:
1.230 ad 2238: if (bp->b_error != 0) {
1.9 oster 2239: /* Mark the disk as dead */
2240: /* but only mark it once... */
1.186 perry 2241: /* and only if it wouldn't leave this RAID set
1.183 oster 2242: completely broken */
1.193 oster 2243: if (((queue->raidPtr->Disks[queue->col].status ==
2244: rf_ds_optimal) ||
2245: (queue->raidPtr->Disks[queue->col].status ==
2246: rf_ds_used_spare)) &&
2247: (queue->raidPtr->numFailures <
1.204 simonb 2248: queue->raidPtr->Layout.map->faultsTolerated)) {
1.9 oster 2249: printf("raid%d: IO Error. Marking %s as failed.\n",
1.136 oster 2250: queue->raidPtr->raidid,
1.166 oster 2251: queue->raidPtr->Disks[queue->col].devname);
2252: queue->raidPtr->Disks[queue->col].status =
1.9 oster 2253: rf_ds_failed;
1.166 oster 2254: queue->raidPtr->status = rf_rs_degraded;
1.9 oster 2255: queue->raidPtr->numFailures++;
1.56 oster 2256: queue->raidPtr->numNewFailures++;
1.9 oster 2257: } else { /* Disk is already dead... */
2258: /* printf("Disk already marked as dead!\n"); */
2259: }
1.4 oster 2260:
1.9 oster 2261: }
1.4 oster 2262:
1.143 oster 2263: /* Fill in the error value */
1.230 ad 2264: req->error = bp->b_error;
1.143 oster 2265:
2266: /* Drop this one on the "finished" queue... */
2267: TAILQ_INSERT_TAIL(&(queue->raidPtr->iodone), req, iodone_entries);
2268:
2269: /* Let the raidio thread know there is work to be done. */
1.286 mrg 2270: rf_signal_cond2(queue->raidPtr->iodone_cv);
1.143 oster 2271:
1.286 mrg 2272: rf_unlock_mutex2(queue->raidPtr->iodone_lock);
1.1 oster 2273: }
2274:
2275:
2276: /*
2277: * initialize a buf structure for doing an I/O in the kernel.
2278: */
1.186 perry 2279: static void
1.169 oster 2280: InitBP(struct buf *bp, struct vnode *b_vp, unsigned rw_flag, dev_t dev,
1.225 christos 2281: RF_SectorNum_t startSect, RF_SectorCount_t numSect, void *bf,
1.169 oster 2282: void (*cbFunc) (struct buf *), void *cbArg, int logBytesPerSector,
2283: struct proc *b_proc)
1.9 oster 2284: {
2285: /* bp->b_flags = B_PHYS | rw_flag; */
1.242 ad 2286: bp->b_flags = rw_flag; /* XXX need B_PHYS here too??? */
2287: bp->b_oflags = 0;
2288: bp->b_cflags = 0;
1.9 oster 2289: bp->b_bcount = numSect << logBytesPerSector;
2290: bp->b_bufsize = bp->b_bcount;
2291: bp->b_error = 0;
2292: bp->b_dev = dev;
1.187 christos 2293: bp->b_data = bf;
1.275 mrg 2294: bp->b_blkno = startSect << logBytesPerSector >> DEV_BSHIFT;
1.9 oster 2295: bp->b_resid = bp->b_bcount; /* XXX is this right!??!?!! */
1.1 oster 2296: if (bp->b_bcount == 0) {
1.141 provos 2297: panic("bp->b_bcount is zero in InitBP!!");
1.1 oster 2298: }
1.161 fvdl 2299: bp->b_proc = b_proc;
1.9 oster 2300: bp->b_iodone = cbFunc;
1.207 simonb 2301: bp->b_private = cbArg;
1.1 oster 2302: }
2303:
2304: static void
1.186 perry 2305: raidgetdefaultlabel(RF_Raid_t *raidPtr, struct raid_softc *rs,
1.169 oster 2306: struct disklabel *lp)
1.1 oster 2307: {
1.108 thorpej 2308: memset(lp, 0, sizeof(*lp));
1.1 oster 2309:
2310: /* fabricate a label... */
2311: lp->d_secperunit = raidPtr->totalSectors;
2312: lp->d_secsize = raidPtr->bytesPerSector;
1.45 oster 2313: lp->d_nsectors = raidPtr->Layout.dataSectorsPerStripe;
1.105 oster 2314: lp->d_ntracks = 4 * raidPtr->numCol;
1.186 perry 2315: lp->d_ncylinders = raidPtr->totalSectors /
1.45 oster 2316: (lp->d_nsectors * lp->d_ntracks);
1.1 oster 2317: lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
2318:
2319: strncpy(lp->d_typename, "raid", sizeof(lp->d_typename));
1.9 oster 2320: lp->d_type = DTYPE_RAID;
1.1 oster 2321: strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
2322: lp->d_rpm = 3600;
2323: lp->d_interleave = 1;
2324: lp->d_flags = 0;
2325:
2326: lp->d_partitions[RAW_PART].p_offset = 0;
2327: lp->d_partitions[RAW_PART].p_size = raidPtr->totalSectors;
2328: lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
2329: lp->d_npartitions = RAW_PART + 1;
2330:
2331: lp->d_magic = DISKMAGIC;
2332: lp->d_magic2 = DISKMAGIC;
2333: lp->d_checksum = dkcksum(rs->sc_dkdev.dk_label);
2334:
2335: }
2336: /*
2337: * Read the disklabel from the raid device. If one is not present, fake one
2338: * up.
2339: */
2340: static void
1.169 oster 2341: raidgetdisklabel(dev_t dev)
1.1 oster 2342: {
1.9 oster 2343: int unit = raidunit(dev);
1.300 christos 2344: struct raid_softc *rs;
1.158 dsl 2345: const char *errstring;
1.300 christos 2346: struct disklabel *lp;
2347: struct cpu_disklabel *clp;
1.1 oster 2348: RF_Raid_t *raidPtr;
2349:
1.300 christos 2350: if ((rs = raidget(unit)) == NULL)
2351: return;
2352:
2353: lp = rs->sc_dkdev.dk_label;
2354: clp = rs->sc_dkdev.dk_cpulabel;
2355:
1.1 oster 2356: db1_printf(("Getting the disklabel...\n"));
2357:
1.108 thorpej 2358: memset(clp, 0, sizeof(*clp));
1.1 oster 2359:
1.300 christos 2360: raidPtr = &rs->sc_r;
1.1 oster 2361:
2362: raidgetdefaultlabel(raidPtr, rs, lp);
2363:
2364: /*
2365: * Call the generic disklabel extraction routine.
2366: */
2367: errstring = readdisklabel(RAIDLABELDEV(dev), raidstrategy,
2368: rs->sc_dkdev.dk_label, rs->sc_dkdev.dk_cpulabel);
1.9 oster 2369: if (errstring)
1.1 oster 2370: raidmakedisklabel(rs);
2371: else {
1.9 oster 2372: int i;
1.1 oster 2373: struct partition *pp;
2374:
2375: /*
2376: * Sanity check whether the found disklabel is valid.
2377: *
2378: * This is necessary since total size of the raid device
2379: * may vary when an interleave is changed even though exactly
1.211 oster 2380: * same components are used, and old disklabel may used
1.1 oster 2381: * if that is found.
2382: */
2383: if (lp->d_secperunit != rs->sc_size)
1.123 oster 2384: printf("raid%d: WARNING: %s: "
1.260 sborrill 2385: "total sector size in disklabel (%" PRIu32 ") != "
2386: "the size of raid (%" PRIu64 ")\n", unit, rs->sc_xname,
2387: lp->d_secperunit, rs->sc_size);
1.1 oster 2388: for (i = 0; i < lp->d_npartitions; i++) {
2389: pp = &lp->d_partitions[i];
2390: if (pp->p_offset + pp->p_size > rs->sc_size)
1.123 oster 2391: printf("raid%d: WARNING: %s: end of partition `%c' "
1.260 sborrill 2392: "exceeds the size of raid (%" PRIu64 ")\n",
2393: unit, rs->sc_xname, 'a' + i, rs->sc_size);
1.1 oster 2394: }
2395: }
2396:
2397: }
2398: /*
2399: * Take care of things one might want to take care of in the event
2400: * that a disklabel isn't present.
2401: */
2402: static void
1.169 oster 2403: raidmakedisklabel(struct raid_softc *rs)
1.1 oster 2404: {
2405: struct disklabel *lp = rs->sc_dkdev.dk_label;
2406: db1_printf(("Making a label..\n"));
2407:
2408: /*
2409: * For historical reasons, if there's no disklabel present
2410: * the raw partition must be marked FS_BSDFFS.
2411: */
2412:
2413: lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
2414:
2415: strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
2416:
2417: lp->d_checksum = dkcksum(lp);
2418: }
2419: /*
2420: * Wait interruptibly for an exclusive lock.
2421: *
2422: * XXX
2423: * Several drivers do this; it should be abstracted and made MP-safe.
2424: * (Hmm... where have we seen this warning before :-> GO )
2425: */
2426: static int
1.169 oster 2427: raidlock(struct raid_softc *rs)
1.1 oster 2428: {
1.9 oster 2429: int error;
1.1 oster 2430:
2431: while ((rs->sc_flags & RAIDF_LOCKED) != 0) {
2432: rs->sc_flags |= RAIDF_WANTED;
1.9 oster 2433: if ((error =
2434: tsleep(rs, PRIBIO | PCATCH, "raidlck", 0)) != 0)
1.1 oster 2435: return (error);
2436: }
2437: rs->sc_flags |= RAIDF_LOCKED;
2438: return (0);
2439: }
2440: /*
2441: * Unlock and wake up any waiters.
2442: */
2443: static void
1.169 oster 2444: raidunlock(struct raid_softc *rs)
1.1 oster 2445: {
2446:
2447: rs->sc_flags &= ~RAIDF_LOCKED;
2448: if ((rs->sc_flags & RAIDF_WANTED) != 0) {
2449: rs->sc_flags &= ~RAIDF_WANTED;
2450: wakeup(rs);
2451: }
1.11 oster 2452: }
1.186 perry 2453:
1.11 oster 2454:
2455: #define RF_COMPONENT_INFO_OFFSET 16384 /* bytes */
2456: #define RF_COMPONENT_INFO_SIZE 1024 /* bytes */
1.269 jld 2457: #define RF_PARITY_MAP_SIZE RF_PARITYMAP_NBYTE
1.11 oster 2458:
1.276 mrg 2459: static daddr_t
2460: rf_component_info_offset(void)
2461: {
2462:
2463: return RF_COMPONENT_INFO_OFFSET;
2464: }
2465:
2466: static daddr_t
2467: rf_component_info_size(unsigned secsize)
2468: {
2469: daddr_t info_size;
2470:
2471: KASSERT(secsize);
2472: if (secsize > RF_COMPONENT_INFO_SIZE)
2473: info_size = secsize;
2474: else
2475: info_size = RF_COMPONENT_INFO_SIZE;
2476:
2477: return info_size;
2478: }
2479:
2480: static daddr_t
2481: rf_parity_map_offset(RF_Raid_t *raidPtr)
2482: {
2483: daddr_t map_offset;
2484:
2485: KASSERT(raidPtr->bytesPerSector);
2486: if (raidPtr->bytesPerSector > RF_COMPONENT_INFO_SIZE)
2487: map_offset = raidPtr->bytesPerSector;
2488: else
2489: map_offset = RF_COMPONENT_INFO_SIZE;
2490: map_offset += rf_component_info_offset();
2491:
2492: return map_offset;
2493: }
2494:
2495: static daddr_t
2496: rf_parity_map_size(RF_Raid_t *raidPtr)
2497: {
2498: daddr_t map_size;
2499:
2500: if (raidPtr->bytesPerSector > RF_PARITY_MAP_SIZE)
2501: map_size = raidPtr->bytesPerSector;
2502: else
2503: map_size = RF_PARITY_MAP_SIZE;
2504:
2505: return map_size;
2506: }
2507:
1.186 perry 2508: int
1.269 jld 2509: raidmarkclean(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.12 oster 2510: {
1.269 jld 2511: RF_ComponentLabel_t *clabel;
2512:
2513: clabel = raidget_component_label(raidPtr, col);
2514: clabel->clean = RF_RAID_CLEAN;
2515: raidflush_component_label(raidPtr, col);
1.12 oster 2516: return(0);
2517: }
2518:
2519:
1.186 perry 2520: int
1.269 jld 2521: raidmarkdirty(RF_Raid_t *raidPtr, RF_RowCol_t col)
1.11 oster 2522: {
1.269 jld 2523: RF_ComponentLabel_t *clabel;
2524:
2525: clabel = raidget_component_label(raidPtr, col);
2526: clabel->clean = RF_RAID_DIRTY;
2527: raidflush_component_label(raidPtr, col);
1.11 oster 2528: return(0);
2529: }
2530:
2531: int
1.269 jld 2532: raidfetch_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2533: {
1.276 mrg 2534: KASSERT(raidPtr->bytesPerSector);
2535: return raidread_component_label(raidPtr->bytesPerSector,
2536: raidPtr->Disks[col].dev,
1.269 jld 2537: raidPtr->raid_cinfo[col].ci_vp,
2538: &raidPtr->raid_cinfo[col].ci_label);
2539: }
2540:
2541: RF_ComponentLabel_t *
2542: raidget_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2543: {
2544: return &raidPtr->raid_cinfo[col].ci_label;
2545: }
2546:
2547: int
2548: raidflush_component_label(RF_Raid_t *raidPtr, RF_RowCol_t col)
2549: {
2550: RF_ComponentLabel_t *label;
2551:
2552: label = &raidPtr->raid_cinfo[col].ci_label;
2553: label->mod_counter = raidPtr->mod_counter;
2554: #ifndef RF_NO_PARITY_MAP
2555: label->parity_map_modcount = label->mod_counter;
2556: #endif
1.276 mrg 2557: return raidwrite_component_label(raidPtr->bytesPerSector,
2558: raidPtr->Disks[col].dev,
1.269 jld 2559: raidPtr->raid_cinfo[col].ci_vp, label);
2560: }
2561:
2562:
2563: static int
1.276 mrg 2564: raidread_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
1.269 jld 2565: RF_ComponentLabel_t *clabel)
2566: {
2567: return raidread_component_area(dev, b_vp, clabel,
2568: sizeof(RF_ComponentLabel_t),
1.276 mrg 2569: rf_component_info_offset(),
2570: rf_component_info_size(secsize));
1.269 jld 2571: }
2572:
2573: /* ARGSUSED */
2574: static int
2575: raidread_component_area(dev_t dev, struct vnode *b_vp, void *data,
2576: size_t msize, daddr_t offset, daddr_t dsize)
1.11 oster 2577: {
2578: struct buf *bp;
1.130 gehenna 2579: const struct bdevsw *bdev;
1.11 oster 2580: int error;
1.186 perry 2581:
1.11 oster 2582: /* XXX should probably ensure that we don't try to do this if
1.186 perry 2583: someone has changed rf_protected_sectors. */
1.11 oster 2584:
1.98 oster 2585: if (b_vp == NULL) {
2586: /* For whatever reason, this component is not valid.
2587: Don't try to read a component label from it. */
2588: return(EINVAL);
2589: }
2590:
1.11 oster 2591: /* get a block of the appropriate size... */
1.269 jld 2592: bp = geteblk((int)dsize);
1.11 oster 2593: bp->b_dev = dev;
2594:
2595: /* get our ducks in a row for the read */
1.269 jld 2596: bp->b_blkno = offset / DEV_BSIZE;
2597: bp->b_bcount = dsize;
1.100 chs 2598: bp->b_flags |= B_READ;
1.269 jld 2599: bp->b_resid = dsize;
1.11 oster 2600:
1.130 gehenna 2601: bdev = bdevsw_lookup(bp->b_dev);
2602: if (bdev == NULL)
2603: return (ENXIO);
2604: (*bdev->d_strategy)(bp);
1.11 oster 2605:
1.186 perry 2606: error = biowait(bp);
1.11 oster 2607:
2608: if (!error) {
1.269 jld 2609: memcpy(data, bp->b_data, msize);
1.204 simonb 2610: }
1.11 oster 2611:
1.233 ad 2612: brelse(bp, 0);
1.11 oster 2613: return(error);
2614: }
1.269 jld 2615:
2616:
2617: static int
1.276 mrg 2618: raidwrite_component_label(unsigned secsize, dev_t dev, struct vnode *b_vp,
2619: RF_ComponentLabel_t *clabel)
1.269 jld 2620: {
2621: return raidwrite_component_area(dev, b_vp, clabel,
2622: sizeof(RF_ComponentLabel_t),
1.276 mrg 2623: rf_component_info_offset(),
2624: rf_component_info_size(secsize), 0);
1.269 jld 2625: }
2626:
1.11 oster 2627: /* ARGSUSED */
1.269 jld 2628: static int
2629: raidwrite_component_area(dev_t dev, struct vnode *b_vp, void *data,
2630: size_t msize, daddr_t offset, daddr_t dsize, int asyncp)
1.11 oster 2631: {
2632: struct buf *bp;
1.130 gehenna 2633: const struct bdevsw *bdev;
1.11 oster 2634: int error;
2635:
2636: /* get a block of the appropriate size... */
1.269 jld 2637: bp = geteblk((int)dsize);
1.11 oster 2638: bp->b_dev = dev;
2639:
2640: /* get our ducks in a row for the write */
1.269 jld 2641: bp->b_blkno = offset / DEV_BSIZE;
2642: bp->b_bcount = dsize;
2643: bp->b_flags |= B_WRITE | (asyncp ? B_ASYNC : 0);
2644: bp->b_resid = dsize;
1.11 oster 2645:
1.269 jld 2646: memset(bp->b_data, 0, dsize);
2647: memcpy(bp->b_data, data, msize);
1.11 oster 2648:
1.130 gehenna 2649: bdev = bdevsw_lookup(bp->b_dev);
2650: if (bdev == NULL)
2651: return (ENXIO);
2652: (*bdev->d_strategy)(bp);
1.269 jld 2653: if (asyncp)
2654: return 0;
1.186 perry 2655: error = biowait(bp);
1.233 ad 2656: brelse(bp, 0);
1.11 oster 2657: if (error) {
1.48 oster 2658: #if 1
1.11 oster 2659: printf("Failed to write RAID component info!\n");
1.48 oster 2660: #endif
1.11 oster 2661: }
2662:
2663: return(error);
1.1 oster 2664: }
1.12 oster 2665:
1.186 perry 2666: void
1.269 jld 2667: rf_paritymap_kern_write(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2668: {
2669: int c;
2670:
2671: for (c = 0; c < raidPtr->numCol; c++) {
2672: /* Skip dead disks. */
2673: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2674: continue;
2675: /* XXXjld: what if an error occurs here? */
2676: raidwrite_component_area(raidPtr->Disks[c].dev,
2677: raidPtr->raid_cinfo[c].ci_vp, map,
2678: RF_PARITYMAP_NBYTE,
1.276 mrg 2679: rf_parity_map_offset(raidPtr),
2680: rf_parity_map_size(raidPtr), 0);
1.269 jld 2681: }
2682: }
2683:
2684: void
2685: rf_paritymap_kern_read(RF_Raid_t *raidPtr, struct rf_paritymap_ondisk *map)
2686: {
2687: struct rf_paritymap_ondisk tmp;
1.272 oster 2688: int c,first;
1.269 jld 2689:
1.272 oster 2690: first=1;
1.269 jld 2691: for (c = 0; c < raidPtr->numCol; c++) {
2692: /* Skip dead disks. */
2693: if (RF_DEAD_DISK(raidPtr->Disks[c].status))
2694: continue;
2695: raidread_component_area(raidPtr->Disks[c].dev,
2696: raidPtr->raid_cinfo[c].ci_vp, &tmp,
2697: RF_PARITYMAP_NBYTE,
1.276 mrg 2698: rf_parity_map_offset(raidPtr),
2699: rf_parity_map_size(raidPtr));
1.272 oster 2700: if (first) {
1.269 jld 2701: memcpy(map, &tmp, sizeof(*map));
1.272 oster 2702: first = 0;
1.269 jld 2703: } else {
2704: rf_paritymap_merge(map, &tmp);
2705: }
2706: }
2707: }
2708:
2709: void
1.169 oster 2710: rf_markalldirty(RF_Raid_t *raidPtr)
1.12 oster 2711: {
1.269 jld 2712: RF_ComponentLabel_t *clabel;
1.146 oster 2713: int sparecol;
1.166 oster 2714: int c;
2715: int j;
2716: int scol = -1;
1.12 oster 2717:
2718: raidPtr->mod_counter++;
1.166 oster 2719: for (c = 0; c < raidPtr->numCol; c++) {
2720: /* we don't want to touch (at all) a disk that has
2721: failed */
2722: if (!RF_DEAD_DISK(raidPtr->Disks[c].status)) {
1.269 jld 2723: clabel = raidget_component_label(raidPtr, c);
2724: if (clabel->status == rf_ds_spared) {
1.186 perry 2725: /* XXX do something special...
2726: but whatever you do, don't
1.166 oster 2727: try to access it!! */
2728: } else {
1.269 jld 2729: raidmarkdirty(raidPtr, c);
1.12 oster 2730: }
1.166 oster 2731: }
1.186 perry 2732: }
1.146 oster 2733:
1.12 oster 2734: for( c = 0; c < raidPtr->numSpare ; c++) {
2735: sparecol = raidPtr->numCol + c;
1.166 oster 2736: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2737: /*
2738:
2739: we claim this disk is "optimal" if it's
2740: rf_ds_used_spare, as that means it should be
2741: directly substitutable for the disk it replaced.
1.12 oster 2742: We note that too...
2743:
2744: */
2745:
1.166 oster 2746: for(j=0;j<raidPtr->numCol;j++) {
2747: if (raidPtr->Disks[j].spareCol == sparecol) {
2748: scol = j;
2749: break;
1.12 oster 2750: }
2751: }
1.186 perry 2752:
1.269 jld 2753: clabel = raidget_component_label(raidPtr, sparecol);
1.12 oster 2754: /* make sure status is noted */
1.146 oster 2755:
1.269 jld 2756: raid_init_component_label(raidPtr, clabel);
1.146 oster 2757:
1.269 jld 2758: clabel->row = 0;
2759: clabel->column = scol;
1.146 oster 2760: /* Note: we *don't* change status from rf_ds_used_spare
2761: to rf_ds_optimal */
2762: /* clabel.status = rf_ds_optimal; */
1.186 perry 2763:
1.269 jld 2764: raidmarkdirty(raidPtr, sparecol);
1.12 oster 2765: }
2766: }
2767: }
2768:
1.13 oster 2769:
2770: void
1.169 oster 2771: rf_update_component_labels(RF_Raid_t *raidPtr, int final)
1.13 oster 2772: {
1.269 jld 2773: RF_ComponentLabel_t *clabel;
1.13 oster 2774: int sparecol;
1.166 oster 2775: int c;
2776: int j;
2777: int scol;
1.13 oster 2778:
2779: scol = -1;
2780:
1.186 perry 2781: /* XXX should do extra checks to make sure things really are clean,
1.13 oster 2782: rather than blindly setting the clean bit... */
2783:
2784: raidPtr->mod_counter++;
2785:
1.166 oster 2786: for (c = 0; c < raidPtr->numCol; c++) {
2787: if (raidPtr->Disks[c].status == rf_ds_optimal) {
1.269 jld 2788: clabel = raidget_component_label(raidPtr, c);
1.201 oster 2789: /* make sure status is noted */
1.269 jld 2790: clabel->status = rf_ds_optimal;
1.201 oster 2791:
1.214 oster 2792: /* note what unit we are configured as */
1.269 jld 2793: clabel->last_unit = raidPtr->raidid;
1.214 oster 2794:
1.269 jld 2795: raidflush_component_label(raidPtr, c);
1.166 oster 2796: if (final == RF_FINAL_COMPONENT_UPDATE) {
2797: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2798: raidmarkclean(raidPtr, c);
1.91 oster 2799: }
1.166 oster 2800: }
1.186 perry 2801: }
1.166 oster 2802: /* else we don't touch it.. */
1.186 perry 2803: }
1.63 oster 2804:
2805: for( c = 0; c < raidPtr->numSpare ; c++) {
2806: sparecol = raidPtr->numCol + c;
1.110 oster 2807: /* Need to ensure that the reconstruct actually completed! */
1.166 oster 2808: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.186 perry 2809: /*
2810:
2811: we claim this disk is "optimal" if it's
2812: rf_ds_used_spare, as that means it should be
2813: directly substitutable for the disk it replaced.
1.63 oster 2814: We note that too...
2815:
2816: */
2817:
1.166 oster 2818: for(j=0;j<raidPtr->numCol;j++) {
2819: if (raidPtr->Disks[j].spareCol == sparecol) {
2820: scol = j;
2821: break;
1.63 oster 2822: }
2823: }
1.186 perry 2824:
1.63 oster 2825: /* XXX shouldn't *really* need this... */
1.269 jld 2826: clabel = raidget_component_label(raidPtr, sparecol);
1.63 oster 2827: /* make sure status is noted */
2828:
1.269 jld 2829: raid_init_component_label(raidPtr, clabel);
2830:
2831: clabel->column = scol;
2832: clabel->status = rf_ds_optimal;
2833: clabel->last_unit = raidPtr->raidid;
1.63 oster 2834:
1.269 jld 2835: raidflush_component_label(raidPtr, sparecol);
1.91 oster 2836: if (final == RF_FINAL_COMPONENT_UPDATE) {
1.13 oster 2837: if (raidPtr->parity_good == RF_RAID_CLEAN) {
1.269 jld 2838: raidmarkclean(raidPtr, sparecol);
1.13 oster 2839: }
2840: }
2841: }
2842: }
1.68 oster 2843: }
2844:
2845: void
1.169 oster 2846: rf_close_component(RF_Raid_t *raidPtr, struct vnode *vp, int auto_configured)
1.69 oster 2847: {
2848:
2849: if (vp != NULL) {
2850: if (auto_configured == 1) {
1.96 oster 2851: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 2852: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.69 oster 2853: vput(vp);
1.186 perry 2854:
2855: } else {
1.244 ad 2856: (void) vn_close(vp, FREAD | FWRITE, curlwp->l_cred);
1.69 oster 2857: }
1.186 perry 2858: }
1.69 oster 2859: }
2860:
2861:
2862: void
1.169 oster 2863: rf_UnconfigureVnodes(RF_Raid_t *raidPtr)
1.68 oster 2864: {
1.186 perry 2865: int r,c;
1.69 oster 2866: struct vnode *vp;
2867: int acd;
1.68 oster 2868:
2869:
2870: /* We take this opportunity to close the vnodes like we should.. */
2871:
1.166 oster 2872: for (c = 0; c < raidPtr->numCol; c++) {
2873: vp = raidPtr->raid_cinfo[c].ci_vp;
2874: acd = raidPtr->Disks[c].auto_configured;
2875: rf_close_component(raidPtr, vp, acd);
2876: raidPtr->raid_cinfo[c].ci_vp = NULL;
2877: raidPtr->Disks[c].auto_configured = 0;
1.68 oster 2878: }
1.166 oster 2879:
1.68 oster 2880: for (r = 0; r < raidPtr->numSpare; r++) {
1.166 oster 2881: vp = raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp;
2882: acd = raidPtr->Disks[raidPtr->numCol + r].auto_configured;
1.69 oster 2883: rf_close_component(raidPtr, vp, acd);
1.166 oster 2884: raidPtr->raid_cinfo[raidPtr->numCol + r].ci_vp = NULL;
2885: raidPtr->Disks[raidPtr->numCol + r].auto_configured = 0;
1.68 oster 2886: }
1.37 oster 2887: }
1.63 oster 2888:
1.37 oster 2889:
1.186 perry 2890: void
1.169 oster 2891: rf_ReconThread(struct rf_recon_req *req)
1.37 oster 2892: {
2893: int s;
2894: RF_Raid_t *raidPtr;
2895:
2896: s = splbio();
2897: raidPtr = (RF_Raid_t *) req->raidPtr;
2898: raidPtr->recon_in_progress = 1;
2899:
1.166 oster 2900: rf_FailDisk((RF_Raid_t *) req->raidPtr, req->col,
1.37 oster 2901: ((req->flags & RF_FDFLAGS_RECON) ? 1 : 0));
2902:
2903: RF_Free(req, sizeof(*req));
2904:
2905: raidPtr->recon_in_progress = 0;
2906: splx(s);
2907:
2908: /* That's all... */
1.204 simonb 2909: kthread_exit(0); /* does not return */
1.37 oster 2910: }
2911:
2912: void
1.169 oster 2913: rf_RewriteParityThread(RF_Raid_t *raidPtr)
1.37 oster 2914: {
2915: int retcode;
2916: int s;
2917:
1.184 oster 2918: raidPtr->parity_rewrite_stripes_done = 0;
1.37 oster 2919: raidPtr->parity_rewrite_in_progress = 1;
2920: s = splbio();
2921: retcode = rf_RewriteParity(raidPtr);
2922: splx(s);
2923: if (retcode) {
1.279 christos 2924: printf("raid%d: Error re-writing parity (%d)!\n",
2925: raidPtr->raidid, retcode);
1.37 oster 2926: } else {
2927: /* set the clean bit! If we shutdown correctly,
2928: the clean bit on each component label will get
2929: set */
2930: raidPtr->parity_good = RF_RAID_CLEAN;
2931: }
2932: raidPtr->parity_rewrite_in_progress = 0;
1.85 oster 2933:
2934: /* Anyone waiting for us to stop? If so, inform them... */
2935: if (raidPtr->waitShutdown) {
2936: wakeup(&raidPtr->parity_rewrite_in_progress);
2937: }
1.37 oster 2938:
2939: /* That's all... */
1.204 simonb 2940: kthread_exit(0); /* does not return */
1.37 oster 2941: }
2942:
2943:
2944: void
1.169 oster 2945: rf_CopybackThread(RF_Raid_t *raidPtr)
1.37 oster 2946: {
2947: int s;
2948:
2949: raidPtr->copyback_in_progress = 1;
2950: s = splbio();
2951: rf_CopybackReconstructedData(raidPtr);
2952: splx(s);
2953: raidPtr->copyback_in_progress = 0;
2954:
2955: /* That's all... */
1.204 simonb 2956: kthread_exit(0); /* does not return */
1.37 oster 2957: }
2958:
2959:
2960: void
1.169 oster 2961: rf_ReconstructInPlaceThread(struct rf_recon_req *req)
1.37 oster 2962: {
2963: int s;
2964: RF_Raid_t *raidPtr;
1.186 perry 2965:
1.37 oster 2966: s = splbio();
2967: raidPtr = req->raidPtr;
2968: raidPtr->recon_in_progress = 1;
1.166 oster 2969: rf_ReconstructInPlace(raidPtr, req->col);
1.37 oster 2970: RF_Free(req, sizeof(*req));
2971: raidPtr->recon_in_progress = 0;
2972: splx(s);
2973:
2974: /* That's all... */
1.204 simonb 2975: kthread_exit(0); /* does not return */
1.48 oster 2976: }
2977:
1.213 christos 2978: static RF_AutoConfig_t *
2979: rf_get_component(RF_AutoConfig_t *ac_list, dev_t dev, struct vnode *vp,
1.276 mrg 2980: const char *cname, RF_SectorCount_t size, uint64_t numsecs,
2981: unsigned secsize)
1.213 christos 2982: {
2983: int good_one = 0;
2984: RF_ComponentLabel_t *clabel;
2985: RF_AutoConfig_t *ac;
2986:
2987: clabel = malloc(sizeof(RF_ComponentLabel_t), M_RAIDFRAME, M_NOWAIT);
2988: if (clabel == NULL) {
2989: oomem:
2990: while(ac_list) {
2991: ac = ac_list;
2992: if (ac->clabel)
2993: free(ac->clabel, M_RAIDFRAME);
2994: ac_list = ac_list->next;
2995: free(ac, M_RAIDFRAME);
2996: }
2997: printf("RAID auto config: out of memory!\n");
2998: return NULL; /* XXX probably should panic? */
2999: }
3000:
1.276 mrg 3001: if (!raidread_component_label(secsize, dev, vp, clabel)) {
3002: /* Got the label. Does it look reasonable? */
1.284 mrg 3003: if (rf_reasonable_label(clabel, numsecs) &&
1.282 enami 3004: (rf_component_label_partitionsize(clabel) <= size)) {
1.224 oster 3005: #ifdef DEBUG
1.276 mrg 3006: printf("Component on: %s: %llu\n",
1.213 christos 3007: cname, (unsigned long long)size);
1.276 mrg 3008: rf_print_component_label(clabel);
1.213 christos 3009: #endif
1.276 mrg 3010: /* if it's reasonable, add it, else ignore it. */
3011: ac = malloc(sizeof(RF_AutoConfig_t), M_RAIDFRAME,
1.213 christos 3012: M_NOWAIT);
1.276 mrg 3013: if (ac == NULL) {
3014: free(clabel, M_RAIDFRAME);
3015: goto oomem;
3016: }
3017: strlcpy(ac->devname, cname, sizeof(ac->devname));
3018: ac->dev = dev;
3019: ac->vp = vp;
3020: ac->clabel = clabel;
3021: ac->next = ac_list;
3022: ac_list = ac;
3023: good_one = 1;
3024: }
1.213 christos 3025: }
3026: if (!good_one) {
3027: /* cleanup */
3028: free(clabel, M_RAIDFRAME);
3029: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 3030: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.213 christos 3031: vput(vp);
3032: }
3033: return ac_list;
3034: }
3035:
1.48 oster 3036: RF_AutoConfig_t *
1.259 cegger 3037: rf_find_raid_components(void)
1.48 oster 3038: {
3039: struct vnode *vp;
3040: struct disklabel label;
1.261 dyoung 3041: device_t dv;
1.268 dyoung 3042: deviter_t di;
1.48 oster 3043: dev_t dev;
1.296 buhrow 3044: int bmajor, bminor, wedge, rf_part_found;
1.48 oster 3045: int error;
3046: int i;
3047: RF_AutoConfig_t *ac_list;
1.276 mrg 3048: uint64_t numsecs;
3049: unsigned secsize;
1.48 oster 3050:
3051: /* initialize the AutoConfig list */
3052: ac_list = NULL;
3053:
3054: /* we begin by trolling through *all* the devices on the system */
3055:
1.268 dyoung 3056: for (dv = deviter_first(&di, DEVITER_F_ROOT_FIRST); dv != NULL;
3057: dv = deviter_next(&di)) {
1.48 oster 3058:
3059: /* we are only interested in disks... */
1.200 thorpej 3060: if (device_class(dv) != DV_DISK)
1.48 oster 3061: continue;
3062:
3063: /* we don't care about floppies... */
1.206 thorpej 3064: if (device_is_a(dv, "fd")) {
1.119 leo 3065: continue;
3066: }
1.129 oster 3067:
3068: /* we don't care about CD's... */
1.206 thorpej 3069: if (device_is_a(dv, "cd")) {
1.129 oster 3070: continue;
3071: }
3072:
1.248 oster 3073: /* we don't care about md's... */
3074: if (device_is_a(dv, "md")) {
3075: continue;
3076: }
3077:
1.120 leo 3078: /* hdfd is the Atari/Hades floppy driver */
1.206 thorpej 3079: if (device_is_a(dv, "hdfd")) {
1.121 leo 3080: continue;
3081: }
1.206 thorpej 3082:
1.121 leo 3083: /* fdisa is the Atari/Milan floppy driver */
1.206 thorpej 3084: if (device_is_a(dv, "fdisa")) {
1.48 oster 3085: continue;
3086: }
1.186 perry 3087:
1.48 oster 3088: /* need to find the device_name_to_block_device_major stuff */
1.245 cegger 3089: bmajor = devsw_name2blk(device_xname(dv), NULL, 0);
1.48 oster 3090:
1.296 buhrow 3091: rf_part_found = 0; /*No raid partition as yet*/
3092:
1.48 oster 3093: /* get a vnode for the raw partition of this disk */
3094:
1.213 christos 3095: wedge = device_is_a(dv, "dk");
3096: bminor = minor(device_unit(dv));
3097: dev = wedge ? makedev(bmajor, bminor) :
3098: MAKEDISKDEV(bmajor, bminor, RAW_PART);
1.48 oster 3099: if (bdevvp(dev, &vp))
3100: panic("RAID can't alloc vnode");
3101:
1.293 jmcneill 3102: error = VOP_OPEN(vp, FREAD | FSILENT, NOCRED);
1.48 oster 3103:
3104: if (error) {
1.186 perry 3105: /* "Who cares." Continue looking
1.48 oster 3106: for something that exists*/
3107: vput(vp);
3108: continue;
3109: }
3110:
1.276 mrg 3111: error = getdisksize(vp, &numsecs, &secsize);
3112: if (error) {
3113: vput(vp);
3114: continue;
3115: }
1.213 christos 3116: if (wedge) {
3117: struct dkwedge_info dkw;
3118: error = VOP_IOCTL(vp, DIOCGWEDGEINFO, &dkw, FREAD,
1.238 pooka 3119: NOCRED);
1.213 christos 3120: if (error) {
3121: printf("RAIDframe: can't get wedge info for "
1.245 cegger 3122: "dev %s (%d)\n", device_xname(dv), error);
1.241 oster 3123: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3124: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3125: vput(vp);
1.213 christos 3126: continue;
3127: }
3128:
1.241 oster 3129: if (strcmp(dkw.dkw_ptype, DKW_PTYPE_RAIDFRAME) != 0) {
3130: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
3131: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
3132: vput(vp);
1.228 christos 3133: continue;
1.241 oster 3134: }
1.213 christos 3135:
3136: ac_list = rf_get_component(ac_list, dev, vp,
1.276 mrg 3137: device_xname(dv), dkw.dkw_size, numsecs, secsize);
1.296 buhrow 3138: rf_part_found = 1; /*There is a raid component on this disk*/
1.213 christos 3139: continue;
3140: }
3141:
1.48 oster 3142: /* Ok, the disk exists. Go get the disklabel. */
1.238 pooka 3143: error = VOP_IOCTL(vp, DIOCGDINFO, &label, FREAD, NOCRED);
1.48 oster 3144: if (error) {
3145: /*
3146: * XXX can't happen - open() would
3147: * have errored out (or faked up one)
3148: */
1.181 thorpej 3149: if (error != ENOTTY)
3150: printf("RAIDframe: can't get label for dev "
1.245 cegger 3151: "%s (%d)\n", device_xname(dv), error);
1.48 oster 3152: }
3153:
3154: /* don't need this any more. We'll allocate it again
3155: a little later if we really do... */
1.96 oster 3156: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 3157: VOP_CLOSE(vp, FREAD | FWRITE, NOCRED);
1.48 oster 3158: vput(vp);
3159:
1.181 thorpej 3160: if (error)
3161: continue;
3162:
1.296 buhrow 3163: rf_part_found = 0; /*No raid partitions yet*/
1.213 christos 3164: for (i = 0; i < label.d_npartitions; i++) {
3165: char cname[sizeof(ac_list->devname)];
3166:
1.48 oster 3167: /* We only support partitions marked as RAID */
3168: if (label.d_partitions[i].p_fstype != FS_RAID)
3169: continue;
3170:
1.206 thorpej 3171: dev = MAKEDISKDEV(bmajor, device_unit(dv), i);
1.48 oster 3172: if (bdevvp(dev, &vp))
3173: panic("RAID can't alloc vnode");
3174:
1.238 pooka 3175: error = VOP_OPEN(vp, FREAD, NOCRED);
1.48 oster 3176: if (error) {
3177: /* Whatever... */
3178: vput(vp);
3179: continue;
3180: }
1.213 christos 3181: snprintf(cname, sizeof(cname), "%s%c",
1.245 cegger 3182: device_xname(dv), 'a' + i);
1.213 christos 3183: ac_list = rf_get_component(ac_list, dev, vp, cname,
1.276 mrg 3184: label.d_partitions[i].p_size, numsecs, secsize);
1.296 buhrow 3185: rf_part_found = 1; /*There is at least one raid partition on this disk*/
3186: }
3187:
3188: /*
3189: *If there is no raid component on this disk, either in a
3190: *disklabel or inside a wedge, check the raw partition as well,
3191: *as it is possible to configure raid components on raw disk
3192: *devices.
3193: */
3194:
3195: if (!rf_part_found) {
3196: char cname[sizeof(ac_list->devname)];
3197:
3198: dev = MAKEDISKDEV(bmajor, device_unit(dv), RAW_PART);
3199: if (bdevvp(dev, &vp))
3200: panic("RAID can't alloc vnode");
3201:
3202: error = VOP_OPEN(vp, FREAD, NOCRED);
3203: if (error) {
3204: /* Whatever... */
3205: vput(vp);
3206: continue;
3207: }
3208: snprintf(cname, sizeof(cname), "%s%c",
3209: device_xname(dv), 'a' + RAW_PART);
3210: ac_list = rf_get_component(ac_list, dev, vp, cname,
3211: label.d_partitions[RAW_PART].p_size, numsecs, secsize);
1.48 oster 3212: }
3213: }
1.268 dyoung 3214: deviter_release(&di);
1.213 christos 3215: return ac_list;
1.48 oster 3216: }
1.186 perry 3217:
1.213 christos 3218:
1.292 oster 3219: int
1.284 mrg 3220: rf_reasonable_label(RF_ComponentLabel_t *clabel, uint64_t numsecs)
1.48 oster 3221: {
1.186 perry 3222:
1.48 oster 3223: if (((clabel->version==RF_COMPONENT_LABEL_VERSION_1) ||
3224: (clabel->version==RF_COMPONENT_LABEL_VERSION)) &&
3225: ((clabel->clean == RF_RAID_CLEAN) ||
3226: (clabel->clean == RF_RAID_DIRTY)) &&
1.186 perry 3227: clabel->row >=0 &&
3228: clabel->column >= 0 &&
1.48 oster 3229: clabel->num_rows > 0 &&
3230: clabel->num_columns > 0 &&
1.186 perry 3231: clabel->row < clabel->num_rows &&
1.48 oster 3232: clabel->column < clabel->num_columns &&
3233: clabel->blockSize > 0 &&
1.282 enami 3234: /*
3235: * numBlocksHi may contain garbage, but it is ok since
3236: * the type is unsigned. If it is really garbage,
3237: * rf_fix_old_label_size() will fix it.
3238: */
3239: rf_component_label_numblocks(clabel) > 0) {
1.284 mrg 3240: /*
3241: * label looks reasonable enough...
3242: * let's make sure it has no old garbage.
3243: */
1.292 oster 3244: if (numsecs)
3245: rf_fix_old_label_size(clabel, numsecs);
1.48 oster 3246: return(1);
3247: }
3248: return(0);
3249: }
3250:
3251:
1.278 mrg 3252: /*
3253: * For reasons yet unknown, some old component labels have garbage in
3254: * the newer numBlocksHi region, and this causes lossage. Since those
3255: * disks will also have numsecs set to less than 32 bits of sectors,
1.299 oster 3256: * we can determine when this corruption has occurred, and fix it.
1.284 mrg 3257: *
3258: * The exact same problem, with the same unknown reason, happens to
3259: * the partitionSizeHi member as well.
1.278 mrg 3260: */
3261: static void
3262: rf_fix_old_label_size(RF_ComponentLabel_t *clabel, uint64_t numsecs)
3263: {
3264:
1.284 mrg 3265: if (numsecs < ((uint64_t)1 << 32)) {
3266: if (clabel->numBlocksHi) {
3267: printf("WARNING: total sectors < 32 bits, yet "
3268: "numBlocksHi set\n"
3269: "WARNING: resetting numBlocksHi to zero.\n");
3270: clabel->numBlocksHi = 0;
3271: }
3272:
3273: if (clabel->partitionSizeHi) {
3274: printf("WARNING: total sectors < 32 bits, yet "
3275: "partitionSizeHi set\n"
3276: "WARNING: resetting partitionSizeHi to zero.\n");
3277: clabel->partitionSizeHi = 0;
3278: }
1.278 mrg 3279: }
3280: }
3281:
3282:
1.224 oster 3283: #ifdef DEBUG
1.48 oster 3284: void
1.169 oster 3285: rf_print_component_label(RF_ComponentLabel_t *clabel)
1.48 oster 3286: {
1.282 enami 3287: uint64_t numBlocks;
1.275 mrg 3288:
1.282 enami 3289: numBlocks = rf_component_label_numblocks(clabel);
1.275 mrg 3290:
1.48 oster 3291: printf(" Row: %d Column: %d Num Rows: %d Num Columns: %d\n",
1.186 perry 3292: clabel->row, clabel->column,
1.48 oster 3293: clabel->num_rows, clabel->num_columns);
3294: printf(" Version: %d Serial Number: %d Mod Counter: %d\n",
3295: clabel->version, clabel->serial_number,
3296: clabel->mod_counter);
3297: printf(" Clean: %s Status: %d\n",
1.271 dyoung 3298: clabel->clean ? "Yes" : "No", clabel->status);
1.48 oster 3299: printf(" sectPerSU: %d SUsPerPU: %d SUsPerRU: %d\n",
3300: clabel->sectPerSU, clabel->SUsPerPU, clabel->SUsPerRU);
1.275 mrg 3301: printf(" RAID Level: %c blocksize: %d numBlocks: %"PRIu64"\n",
3302: (char) clabel->parityConfig, clabel->blockSize, numBlocks);
1.271 dyoung 3303: printf(" Autoconfig: %s\n", clabel->autoconfigure ? "Yes" : "No");
1.186 perry 3304: printf(" Contains root partition: %s\n",
1.271 dyoung 3305: clabel->root_partition ? "Yes" : "No");
3306: printf(" Last configured as: raid%d\n", clabel->last_unit);
1.51 oster 3307: #if 0
3308: printf(" Config order: %d\n", clabel->config_order);
3309: #endif
1.186 perry 3310:
1.48 oster 3311: }
1.133 oster 3312: #endif
1.48 oster 3313:
3314: RF_ConfigSet_t *
1.169 oster 3315: rf_create_auto_sets(RF_AutoConfig_t *ac_list)
1.48 oster 3316: {
3317: RF_AutoConfig_t *ac;
3318: RF_ConfigSet_t *config_sets;
3319: RF_ConfigSet_t *cset;
3320: RF_AutoConfig_t *ac_next;
3321:
3322:
3323: config_sets = NULL;
3324:
3325: /* Go through the AutoConfig list, and figure out which components
3326: belong to what sets. */
3327: ac = ac_list;
3328: while(ac!=NULL) {
3329: /* we're going to putz with ac->next, so save it here
3330: for use at the end of the loop */
3331: ac_next = ac->next;
3332:
3333: if (config_sets == NULL) {
3334: /* will need at least this one... */
3335: config_sets = (RF_ConfigSet_t *)
1.186 perry 3336: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3337: M_RAIDFRAME, M_NOWAIT);
3338: if (config_sets == NULL) {
1.141 provos 3339: panic("rf_create_auto_sets: No memory!");
1.48 oster 3340: }
3341: /* this one is easy :) */
3342: config_sets->ac = ac;
3343: config_sets->next = NULL;
1.51 oster 3344: config_sets->rootable = 0;
1.48 oster 3345: ac->next = NULL;
3346: } else {
3347: /* which set does this component fit into? */
3348: cset = config_sets;
3349: while(cset!=NULL) {
1.49 oster 3350: if (rf_does_it_fit(cset, ac)) {
1.86 oster 3351: /* looks like it matches... */
3352: ac->next = cset->ac;
3353: cset->ac = ac;
1.48 oster 3354: break;
3355: }
3356: cset = cset->next;
3357: }
3358: if (cset==NULL) {
3359: /* didn't find a match above... new set..*/
3360: cset = (RF_ConfigSet_t *)
1.186 perry 3361: malloc(sizeof(RF_ConfigSet_t),
1.48 oster 3362: M_RAIDFRAME, M_NOWAIT);
3363: if (cset == NULL) {
1.141 provos 3364: panic("rf_create_auto_sets: No memory!");
1.48 oster 3365: }
3366: cset->ac = ac;
3367: ac->next = NULL;
3368: cset->next = config_sets;
1.51 oster 3369: cset->rootable = 0;
1.48 oster 3370: config_sets = cset;
3371: }
3372: }
3373: ac = ac_next;
3374: }
3375:
3376:
3377: return(config_sets);
3378: }
3379:
3380: static int
1.169 oster 3381: rf_does_it_fit(RF_ConfigSet_t *cset, RF_AutoConfig_t *ac)
1.48 oster 3382: {
3383: RF_ComponentLabel_t *clabel1, *clabel2;
3384:
3385: /* If this one matches the *first* one in the set, that's good
3386: enough, since the other members of the set would have been
3387: through here too... */
1.60 oster 3388: /* note that we are not checking partitionSize here..
3389:
3390: Note that we are also not checking the mod_counters here.
1.299 oster 3391: If everything else matches except the mod_counter, that's
1.60 oster 3392: good enough for this test. We will deal with the mod_counters
1.186 perry 3393: a little later in the autoconfiguration process.
1.60 oster 3394:
3395: (clabel1->mod_counter == clabel2->mod_counter) &&
1.81 oster 3396:
3397: The reason we don't check for this is that failed disks
3398: will have lower modification counts. If those disks are
3399: not added to the set they used to belong to, then they will
3400: form their own set, which may result in 2 different sets,
3401: for example, competing to be configured at raid0, and
3402: perhaps competing to be the root filesystem set. If the
3403: wrong ones get configured, or both attempt to become /,
3404: weird behaviour and or serious lossage will occur. Thus we
3405: need to bring them into the fold here, and kick them out at
3406: a later point.
1.60 oster 3407:
3408: */
1.48 oster 3409:
3410: clabel1 = cset->ac->clabel;
3411: clabel2 = ac->clabel;
3412: if ((clabel1->version == clabel2->version) &&
3413: (clabel1->serial_number == clabel2->serial_number) &&
3414: (clabel1->num_rows == clabel2->num_rows) &&
3415: (clabel1->num_columns == clabel2->num_columns) &&
3416: (clabel1->sectPerSU == clabel2->sectPerSU) &&
3417: (clabel1->SUsPerPU == clabel2->SUsPerPU) &&
3418: (clabel1->SUsPerRU == clabel2->SUsPerRU) &&
3419: (clabel1->parityConfig == clabel2->parityConfig) &&
3420: (clabel1->maxOutstanding == clabel2->maxOutstanding) &&
3421: (clabel1->blockSize == clabel2->blockSize) &&
1.282 enami 3422: rf_component_label_numblocks(clabel1) ==
3423: rf_component_label_numblocks(clabel2) &&
1.48 oster 3424: (clabel1->autoconfigure == clabel2->autoconfigure) &&
3425: (clabel1->root_partition == clabel2->root_partition) &&
3426: (clabel1->last_unit == clabel2->last_unit) &&
3427: (clabel1->config_order == clabel2->config_order)) {
3428: /* if it get's here, it almost *has* to be a match */
3429: } else {
1.186 perry 3430: /* it's not consistent with somebody in the set..
1.48 oster 3431: punt */
3432: return(0);
3433: }
3434: /* all was fine.. it must fit... */
3435: return(1);
3436: }
3437:
3438: int
1.169 oster 3439: rf_have_enough_components(RF_ConfigSet_t *cset)
1.48 oster 3440: {
1.51 oster 3441: RF_AutoConfig_t *ac;
3442: RF_AutoConfig_t *auto_config;
3443: RF_ComponentLabel_t *clabel;
1.166 oster 3444: int c;
1.51 oster 3445: int num_cols;
3446: int num_missing;
1.86 oster 3447: int mod_counter;
1.87 oster 3448: int mod_counter_found;
1.88 oster 3449: int even_pair_failed;
3450: char parity_type;
1.186 perry 3451:
1.51 oster 3452:
1.48 oster 3453: /* check to see that we have enough 'live' components
3454: of this set. If so, we can configure it if necessary */
3455:
1.51 oster 3456: num_cols = cset->ac->clabel->num_columns;
1.88 oster 3457: parity_type = cset->ac->clabel->parityConfig;
1.51 oster 3458:
3459: /* XXX Check for duplicate components!?!?!? */
3460:
1.86 oster 3461: /* Determine what the mod_counter is supposed to be for this set. */
3462:
1.87 oster 3463: mod_counter_found = 0;
1.101 oster 3464: mod_counter = 0;
1.86 oster 3465: ac = cset->ac;
3466: while(ac!=NULL) {
1.87 oster 3467: if (mod_counter_found==0) {
1.86 oster 3468: mod_counter = ac->clabel->mod_counter;
1.87 oster 3469: mod_counter_found = 1;
3470: } else {
3471: if (ac->clabel->mod_counter > mod_counter) {
3472: mod_counter = ac->clabel->mod_counter;
3473: }
1.86 oster 3474: }
3475: ac = ac->next;
3476: }
3477:
1.51 oster 3478: num_missing = 0;
3479: auto_config = cset->ac;
3480:
1.166 oster 3481: even_pair_failed = 0;
3482: for(c=0; c<num_cols; c++) {
3483: ac = auto_config;
3484: while(ac!=NULL) {
1.186 perry 3485: if ((ac->clabel->column == c) &&
1.166 oster 3486: (ac->clabel->mod_counter == mod_counter)) {
3487: /* it's this one... */
1.224 oster 3488: #ifdef DEBUG
1.166 oster 3489: printf("Found: %s at %d\n",
3490: ac->devname,c);
1.51 oster 3491: #endif
1.166 oster 3492: break;
1.51 oster 3493: }
1.166 oster 3494: ac=ac->next;
3495: }
3496: if (ac==NULL) {
1.51 oster 3497: /* Didn't find one here! */
1.88 oster 3498: /* special case for RAID 1, especially
3499: where there are more than 2
3500: components (where RAIDframe treats
3501: things a little differently :( ) */
1.166 oster 3502: if (parity_type == '1') {
3503: if (c%2 == 0) { /* even component */
3504: even_pair_failed = 1;
3505: } else { /* odd component. If
3506: we're failed, and
3507: so is the even
3508: component, it's
3509: "Good Night, Charlie" */
3510: if (even_pair_failed == 1) {
3511: return(0);
1.88 oster 3512: }
3513: }
1.166 oster 3514: } else {
3515: /* normal accounting */
3516: num_missing++;
1.88 oster 3517: }
1.166 oster 3518: }
3519: if ((parity_type == '1') && (c%2 == 1)) {
1.88 oster 3520: /* Just did an even component, and we didn't
1.186 perry 3521: bail.. reset the even_pair_failed flag,
1.88 oster 3522: and go on to the next component.... */
1.166 oster 3523: even_pair_failed = 0;
1.51 oster 3524: }
3525: }
3526:
3527: clabel = cset->ac->clabel;
3528:
3529: if (((clabel->parityConfig == '0') && (num_missing > 0)) ||
3530: ((clabel->parityConfig == '4') && (num_missing > 1)) ||
3531: ((clabel->parityConfig == '5') && (num_missing > 1))) {
3532: /* XXX this needs to be made *much* more general */
3533: /* Too many failures */
3534: return(0);
3535: }
3536: /* otherwise, all is well, and we've got enough to take a kick
3537: at autoconfiguring this set */
3538: return(1);
1.48 oster 3539: }
3540:
3541: void
1.169 oster 3542: rf_create_configuration(RF_AutoConfig_t *ac, RF_Config_t *config,
1.222 christos 3543: RF_Raid_t *raidPtr)
1.48 oster 3544: {
3545: RF_ComponentLabel_t *clabel;
1.77 oster 3546: int i;
1.48 oster 3547:
3548: clabel = ac->clabel;
3549:
3550: /* 1. Fill in the common stuff */
1.166 oster 3551: config->numRow = clabel->num_rows = 1;
1.48 oster 3552: config->numCol = clabel->num_columns;
3553: config->numSpare = 0; /* XXX should this be set here? */
3554: config->sectPerSU = clabel->sectPerSU;
3555: config->SUsPerPU = clabel->SUsPerPU;
3556: config->SUsPerRU = clabel->SUsPerRU;
3557: config->parityConfig = clabel->parityConfig;
3558: /* XXX... */
3559: strcpy(config->diskQueueType,"fifo");
3560: config->maxOutstandingDiskReqs = clabel->maxOutstanding;
3561: config->layoutSpecificSize = 0; /* XXX ?? */
3562:
3563: while(ac!=NULL) {
3564: /* row/col values will be in range due to the checks
3565: in reasonable_label() */
1.166 oster 3566: strcpy(config->devnames[0][ac->clabel->column],
1.48 oster 3567: ac->devname);
3568: ac = ac->next;
3569: }
3570:
1.77 oster 3571: for(i=0;i<RF_MAXDBGV;i++) {
1.163 fvdl 3572: config->debugVars[i][0] = 0;
1.77 oster 3573: }
1.48 oster 3574: }
3575:
3576: int
1.169 oster 3577: rf_set_autoconfig(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3578: {
1.269 jld 3579: RF_ComponentLabel_t *clabel;
1.166 oster 3580: int column;
1.148 oster 3581: int sparecol;
1.48 oster 3582:
1.54 oster 3583: raidPtr->autoconfigure = new_value;
1.166 oster 3584:
3585: for(column=0; column<raidPtr->numCol; column++) {
3586: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3587: clabel = raidget_component_label(raidPtr, column);
3588: clabel->autoconfigure = new_value;
3589: raidflush_component_label(raidPtr, column);
1.48 oster 3590: }
3591: }
1.148 oster 3592: for(column = 0; column < raidPtr->numSpare ; column++) {
3593: sparecol = raidPtr->numCol + column;
1.166 oster 3594: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3595: clabel = raidget_component_label(raidPtr, sparecol);
3596: clabel->autoconfigure = new_value;
3597: raidflush_component_label(raidPtr, sparecol);
1.148 oster 3598: }
3599: }
1.48 oster 3600: return(new_value);
3601: }
3602:
3603: int
1.169 oster 3604: rf_set_rootpartition(RF_Raid_t *raidPtr, int new_value)
1.48 oster 3605: {
1.269 jld 3606: RF_ComponentLabel_t *clabel;
1.166 oster 3607: int column;
1.148 oster 3608: int sparecol;
1.48 oster 3609:
1.54 oster 3610: raidPtr->root_partition = new_value;
1.166 oster 3611: for(column=0; column<raidPtr->numCol; column++) {
3612: if (raidPtr->Disks[column].status == rf_ds_optimal) {
1.269 jld 3613: clabel = raidget_component_label(raidPtr, column);
3614: clabel->root_partition = new_value;
3615: raidflush_component_label(raidPtr, column);
1.148 oster 3616: }
3617: }
3618: for(column = 0; column < raidPtr->numSpare ; column++) {
3619: sparecol = raidPtr->numCol + column;
1.166 oster 3620: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
1.269 jld 3621: clabel = raidget_component_label(raidPtr, sparecol);
3622: clabel->root_partition = new_value;
3623: raidflush_component_label(raidPtr, sparecol);
1.48 oster 3624: }
3625: }
3626: return(new_value);
3627: }
3628:
3629: void
1.169 oster 3630: rf_release_all_vps(RF_ConfigSet_t *cset)
1.48 oster 3631: {
3632: RF_AutoConfig_t *ac;
1.186 perry 3633:
1.48 oster 3634: ac = cset->ac;
3635: while(ac!=NULL) {
3636: /* Close the vp, and give it back */
3637: if (ac->vp) {
1.96 oster 3638: vn_lock(ac->vp, LK_EXCLUSIVE | LK_RETRY);
1.238 pooka 3639: VOP_CLOSE(ac->vp, FREAD, NOCRED);
1.48 oster 3640: vput(ac->vp);
1.86 oster 3641: ac->vp = NULL;
1.48 oster 3642: }
3643: ac = ac->next;
3644: }
3645: }
3646:
3647:
3648: void
1.169 oster 3649: rf_cleanup_config_set(RF_ConfigSet_t *cset)
1.48 oster 3650: {
3651: RF_AutoConfig_t *ac;
3652: RF_AutoConfig_t *next_ac;
1.186 perry 3653:
1.48 oster 3654: ac = cset->ac;
3655: while(ac!=NULL) {
3656: next_ac = ac->next;
3657: /* nuke the label */
3658: free(ac->clabel, M_RAIDFRAME);
3659: /* cleanup the config structure */
3660: free(ac, M_RAIDFRAME);
3661: /* "next.." */
3662: ac = next_ac;
3663: }
3664: /* and, finally, nuke the config set */
3665: free(cset, M_RAIDFRAME);
3666: }
3667:
3668:
3669: void
1.169 oster 3670: raid_init_component_label(RF_Raid_t *raidPtr, RF_ComponentLabel_t *clabel)
1.48 oster 3671: {
3672: /* current version number */
1.186 perry 3673: clabel->version = RF_COMPONENT_LABEL_VERSION;
1.57 oster 3674: clabel->serial_number = raidPtr->serial_number;
1.48 oster 3675: clabel->mod_counter = raidPtr->mod_counter;
1.269 jld 3676:
1.166 oster 3677: clabel->num_rows = 1;
1.48 oster 3678: clabel->num_columns = raidPtr->numCol;
3679: clabel->clean = RF_RAID_DIRTY; /* not clean */
3680: clabel->status = rf_ds_optimal; /* "It's good!" */
1.186 perry 3681:
1.48 oster 3682: clabel->sectPerSU = raidPtr->Layout.sectorsPerStripeUnit;
3683: clabel->SUsPerPU = raidPtr->Layout.SUsPerPU;
3684: clabel->SUsPerRU = raidPtr->Layout.SUsPerRU;
1.54 oster 3685:
3686: clabel->blockSize = raidPtr->bytesPerSector;
1.282 enami 3687: rf_component_label_set_numblocks(clabel, raidPtr->sectorsPerDisk);
1.54 oster 3688:
1.48 oster 3689: /* XXX not portable */
3690: clabel->parityConfig = raidPtr->Layout.map->parityConfig;
1.54 oster 3691: clabel->maxOutstanding = raidPtr->maxOutstanding;
3692: clabel->autoconfigure = raidPtr->autoconfigure;
3693: clabel->root_partition = raidPtr->root_partition;
1.48 oster 3694: clabel->last_unit = raidPtr->raidid;
1.54 oster 3695: clabel->config_order = raidPtr->config_order;
1.269 jld 3696:
3697: #ifndef RF_NO_PARITY_MAP
3698: rf_paritymap_init_label(raidPtr->parity_map, clabel);
3699: #endif
1.51 oster 3700: }
3701:
1.300 christos 3702: struct raid_softc *
3703: rf_auto_config_set(RF_ConfigSet_t *cset)
1.51 oster 3704: {
3705: RF_Raid_t *raidPtr;
3706: RF_Config_t *config;
3707: int raidID;
1.300 christos 3708: struct raid_softc *sc;
1.51 oster 3709:
1.224 oster 3710: #ifdef DEBUG
1.72 oster 3711: printf("RAID autoconfigure\n");
1.127 oster 3712: #endif
1.51 oster 3713:
3714: /* 1. Create a config structure */
1.300 christos 3715: config = malloc(sizeof(*config), M_RAIDFRAME, M_NOWAIT|M_ZERO);
3716: if (config == NULL) {
1.51 oster 3717: printf("Out of mem!?!?\n");
3718: /* XXX do something more intelligent here. */
1.300 christos 3719: return NULL;
1.51 oster 3720: }
1.77 oster 3721:
1.186 perry 3722: /*
3723: 2. Figure out what RAID ID this one is supposed to live at
1.51 oster 3724: See if we can get the same RAID dev that it was configured
1.186 perry 3725: on last time..
1.51 oster 3726: */
3727:
3728: raidID = cset->ac->clabel->last_unit;
1.300 christos 3729: for (sc = raidget(raidID); sc->sc_r.valid != 0; sc = raidget(++raidID))
3730: continue;
1.224 oster 3731: #ifdef DEBUG
1.72 oster 3732: printf("Configuring raid%d:\n",raidID);
1.127 oster 3733: #endif
3734:
1.300 christos 3735: raidPtr = &sc->sc_r;
1.51 oster 3736:
3737: /* XXX all this stuff should be done SOMEWHERE ELSE! */
1.302 christos 3738: raidPtr->softc = sc;
1.51 oster 3739: raidPtr->raidid = raidID;
3740: raidPtr->openings = RAIDOUTSTANDING;
3741:
3742: /* 3. Build the configuration structure */
3743: rf_create_configuration(cset->ac, config, raidPtr);
3744:
3745: /* 4. Do the configuration */
1.300 christos 3746: if (rf_Configure(raidPtr, config, cset->ac) == 0) {
3747: raidinit(sc);
1.186 perry 3748:
1.300 christos 3749: rf_markalldirty(raidPtr);
3750: raidPtr->autoconfigure = 1; /* XXX do this here? */
1.51 oster 3751: if (cset->ac->clabel->root_partition==1) {
3752: /* everything configured just fine. Make a note
3753: that this set is eligible to be root. */
3754: cset->rootable = 1;
1.54 oster 3755: /* XXX do this here? */
1.300 christos 3756: raidPtr->root_partition = 1;
1.51 oster 3757: }
1.300 christos 3758: } else {
3759: raidput(sc);
3760: sc = NULL;
1.51 oster 3761: }
3762:
3763: /* 5. Cleanup */
3764: free(config, M_RAIDFRAME);
1.300 christos 3765: return sc;
1.99 oster 3766: }
3767:
3768: void
1.169 oster 3769: rf_disk_unbusy(RF_RaidAccessDesc_t *desc)
1.99 oster 3770: {
3771: struct buf *bp;
1.300 christos 3772: struct raid_softc *rs;
1.99 oster 3773:
3774: bp = (struct buf *)desc->bp;
1.300 christos 3775: rs = desc->raidPtr->softc;
3776: disk_unbusy(&rs->sc_dkdev, (bp->b_bcount - bp->b_resid),
3777: (bp->b_flags & B_READ));
1.13 oster 3778: }
1.177 oster 3779:
3780: void
1.187 christos 3781: rf_pool_init(struct pool *p, size_t size, const char *w_chan,
3782: size_t xmin, size_t xmax)
1.177 oster 3783: {
1.227 ad 3784: pool_init(p, size, 0, 0, 0, w_chan, NULL, IPL_BIO);
1.187 christos 3785: pool_sethiwat(p, xmax);
3786: pool_prime(p, xmin);
3787: pool_setlowat(p, xmin);
1.177 oster 3788: }
1.190 oster 3789:
3790: /*
1.300 christos 3791: * rf_buf_queue_check(RF_Raid_t raidPtr) -- looks into the buf_queue to see
1.190 oster 3792: * if there is IO pending and if that IO could possibly be done for a
3793: * given RAID set. Returns 0 if IO is waiting and can be done, 1
3794: * otherwise.
3795: *
3796: */
3797:
3798: int
1.300 christos 3799: rf_buf_queue_check(RF_Raid_t *raidPtr)
1.190 oster 3800: {
1.300 christos 3801: struct raid_softc *rs = raidPtr->softc;
3802: if ((bufq_peek(rs->buf_queue) != NULL) && raidPtr->openings > 0) {
1.190 oster 3803: /* there is work to do */
3804: return 0;
3805: }
3806: /* default is nothing to do */
3807: return 1;
3808: }
1.213 christos 3809:
3810: int
1.294 oster 3811: rf_getdisksize(struct vnode *vp, RF_RaidDisk_t *diskPtr)
1.213 christos 3812: {
1.275 mrg 3813: uint64_t numsecs;
3814: unsigned secsize;
1.213 christos 3815: int error;
3816:
1.275 mrg 3817: error = getdisksize(vp, &numsecs, &secsize);
1.213 christos 3818: if (error == 0) {
1.275 mrg 3819: diskPtr->blockSize = secsize;
3820: diskPtr->numBlocks = numsecs - rf_protectedSectors;
3821: diskPtr->partitionSize = numsecs;
1.213 christos 3822: return 0;
3823: }
3824: return error;
3825: }
1.217 oster 3826:
3827: static int
1.261 dyoung 3828: raid_match(device_t self, cfdata_t cfdata, void *aux)
1.217 oster 3829: {
3830: return 1;
3831: }
3832:
3833: static void
1.261 dyoung 3834: raid_attach(device_t parent, device_t self, void *aux)
1.217 oster 3835: {
3836:
3837: }
3838:
3839:
3840: static int
1.261 dyoung 3841: raid_detach(device_t self, int flags)
1.217 oster 3842: {
1.266 dyoung 3843: int error;
1.303 ! christos 3844: struct raid_softc *rs = raidget(device_unit(self));
! 3845:
! 3846: if (rs == NULL)
! 3847: return ENXIO;
1.266 dyoung 3848:
3849: if ((error = raidlock(rs)) != 0)
3850: return (error);
1.217 oster 3851:
1.266 dyoung 3852: error = raid_detach_unlocked(rs);
3853:
3854: raidunlock(rs);
1.217 oster 3855:
1.303 ! christos 3856: /* XXXkd: raidput(rs) ??? */
! 3857:
1.266 dyoung 3858: return error;
1.217 oster 3859: }
3860:
1.234 oster 3861: static void
3862: rf_set_properties(struct raid_softc *rs, RF_Raid_t *raidPtr)
3863: {
3864: prop_dictionary_t disk_info, odisk_info, geom;
3865: disk_info = prop_dictionary_create();
3866: geom = prop_dictionary_create();
3867: prop_dictionary_set_uint64(geom, "sectors-per-unit",
3868: raidPtr->totalSectors);
3869: prop_dictionary_set_uint32(geom, "sector-size",
3870: raidPtr->bytesPerSector);
3871:
3872: prop_dictionary_set_uint16(geom, "sectors-per-track",
3873: raidPtr->Layout.dataSectorsPerStripe);
3874: prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
3875: 4 * raidPtr->numCol);
3876:
3877: prop_dictionary_set_uint64(geom, "cylinders-per-unit",
3878: raidPtr->totalSectors / (raidPtr->Layout.dataSectorsPerStripe *
3879: (4 * raidPtr->numCol)));
3880:
3881: prop_dictionary_set(disk_info, "geometry", geom);
3882: prop_object_release(geom);
3883: prop_dictionary_set(device_properties(rs->sc_dev),
3884: "disk-info", disk_info);
3885: odisk_info = rs->sc_dkdev.dk_info;
3886: rs->sc_dkdev.dk_info = disk_info;
3887: if (odisk_info)
3888: prop_object_release(odisk_info);
3889: }
1.252 oster 3890:
3891: /*
3892: * Implement forwarding of the DIOCCACHESYNC ioctl to each of the components.
3893: * We end up returning whatever error was returned by the first cache flush
3894: * that fails.
3895: */
3896:
1.269 jld 3897: int
1.252 oster 3898: rf_sync_component_caches(RF_Raid_t *raidPtr)
3899: {
3900: int c, sparecol;
3901: int e,error;
3902: int force = 1;
3903:
3904: error = 0;
3905: for (c = 0; c < raidPtr->numCol; c++) {
3906: if (raidPtr->Disks[c].status == rf_ds_optimal) {
3907: e = VOP_IOCTL(raidPtr->raid_cinfo[c].ci_vp, DIOCCACHESYNC,
3908: &force, FWRITE, NOCRED);
3909: if (e) {
1.255 oster 3910: if (e != ENODEV)
3911: printf("raid%d: cache flush to component %s failed.\n",
3912: raidPtr->raidid, raidPtr->Disks[c].devname);
1.252 oster 3913: if (error == 0) {
3914: error = e;
3915: }
3916: }
3917: }
3918: }
3919:
3920: for( c = 0; c < raidPtr->numSpare ; c++) {
3921: sparecol = raidPtr->numCol + c;
3922: /* Need to ensure that the reconstruct actually completed! */
3923: if (raidPtr->Disks[sparecol].status == rf_ds_used_spare) {
3924: e = VOP_IOCTL(raidPtr->raid_cinfo[sparecol].ci_vp,
3925: DIOCCACHESYNC, &force, FWRITE, NOCRED);
3926: if (e) {
1.255 oster 3927: if (e != ENODEV)
3928: printf("raid%d: cache flush to component %s failed.\n",
3929: raidPtr->raidid, raidPtr->Disks[sparecol].devname);
1.252 oster 3930: if (error == 0) {
3931: error = e;
3932: }
3933: }
3934: }
3935: }
3936: return error;
3937: }
CVSweb <webmaster@jp.NetBSD.org>