Annotation of src/sys/ufs/ufs/ufs_dirhash.c, Revision 1.1.2.3
1.1.2.3 ! skrll 1: /* $NetBSD: ufs_dirhash.c,v 1.1.2.2 2005/03/04 16:55:00 skrll Exp $ */
1.1.2.2 skrll 2:
3: /*
4: * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: *
15: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25: * SUCH DAMAGE.
26: *
27: * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $
28: */
29:
30: /*
31: * This implements a hash-based lookup scheme for UFS directories.
32: */
33:
34: #ifdef UFS_DIRHASH
35:
36: #include <sys/param.h>
37: #include <sys/systm.h>
38: #include <sys/kernel.h>
39: #include <sys/malloc.h>
40: #include <sys/types.h>
41: #include <sys/hash.h>
42: #include <sys/proc.h>
43: #include <sys/buf.h>
44: #include <sys/vnode.h>
45: #include <sys/mount.h>
46: #include <sys/pool.h>
47: #include <sys/sysctl.h>
48:
49: #include <ufs/ufs/quota.h>
50: #include <ufs/ufs/inode.h>
51: #include <ufs/ufs/dir.h>
52: #include <ufs/ufs/dirhash.h>
53: #include <ufs/ufs/ufsmount.h>
54: #include <ufs/ufs/ufs_bswap.h>
55: #include <ufs/ufs/ufs_extern.h>
56:
57: #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1))
58: #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1))
59: #define OFSFMT(ip) ((ip)->i_ump->um_maxsymlinklen <= 0)
60: #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
61:
62: static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables");
63:
64: static int ufs_dirhashminblks = 5;
65: static int ufs_dirhashmaxmem = 2 * 1024 * 1024;
1.1.2.3 ! skrll 66: static int ufs_dirhashmem;
1.1.2.2 skrll 67: static int ufs_dirhashcheck = 0;
68:
69: static int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen);
70: static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff,
71: int dirblksiz);
72: static void ufsdirhash_delslot(struct dirhash *dh, int slot);
73: static int ufsdirhash_findslot(struct dirhash *dh, const char *name,
74: int namelen, doff_t offset);
75: static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset,
76: int dirblksiz);
77: static int ufsdirhash_recycle(int wanted);
78:
79: POOL_INIT(ufsdirhash_pool, DH_NBLKOFF * sizeof(daddr_t), 0, 0, 0, "ufsdirhash",
80: &pool_allocator_nointr);
81:
82: #define DIRHASHLIST_LOCK() do { } while (0)
83: #define DIRHASHLIST_UNLOCK() do { } while (0)
84: #define DIRHASH_LOCK(dh) do { } while (0)
85: #define DIRHASH_UNLOCK(dh) do { } while (0)
86: #define DIRHASH_BLKALLOC_WAITOK() pool_get(&ufsdirhash_pool, PR_WAITOK)
87: #define DIRHASH_BLKFREE(ptr) pool_put(&ufsdirhash_pool, ptr)
88:
89: /* Dirhash list; recently-used entries are near the tail. */
90: static TAILQ_HEAD(, dirhash) ufsdirhash_list;
91:
92: /*
93: * Attempt to build up a hash table for the directory contents in
94: * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
95: */
96: int
97: ufsdirhash_build(struct inode *ip)
98: {
99: struct dirhash *dh;
100: struct buf *bp = NULL;
101: struct direct *ep;
102: struct vnode *vp;
103: doff_t bmask, pos;
104: int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
1.1.2.3 ! skrll 105: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1.2.2 skrll 106: int dirblksiz = ip->i_ump->um_dirblksiz;
107:
108: /* Check if we can/should use dirhash. */
109: if (ip->i_dirhash == NULL) {
110: if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip))
111: return (-1);
112: } else {
113: /* Hash exists, but sysctls could have changed. */
114: if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
115: ufs_dirhashmem > ufs_dirhashmaxmem) {
116: ufsdirhash_free(ip);
117: return (-1);
118: }
119: /* Check if hash exists and is intact (note: unlocked read). */
120: if (ip->i_dirhash->dh_hash != NULL)
121: return (0);
122: /* Free the old, recycled hash and build a new one. */
123: ufsdirhash_free(ip);
124: }
125:
126: /* Don't hash removed directories. */
127: if (ip->i_ffs_effnlink == 0)
128: return (-1);
129:
130: vp = ip->i_vnode;
131: /* Allocate 50% more entries than this dir size could ever need. */
132: KASSERT(ip->i_size >= dirblksiz);
133: nslots = ip->i_size / DIRECTSIZ(1);
134: nslots = (nslots * 3 + 1) / 2;
135: narrays = howmany(nslots, DH_NBLKOFF);
136: nslots = narrays * DH_NBLKOFF;
137: dirblocks = howmany(ip->i_size, dirblksiz);
138: nblocks = (dirblocks * 3 + 1) / 2;
139:
140: memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
141: narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
142: nblocks * sizeof(*dh->dh_blkfree);
143: DIRHASHLIST_LOCK();
144: if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
145: DIRHASHLIST_UNLOCK();
146: if (memreqd > ufs_dirhashmaxmem / 2)
147: return (-1);
148:
149: /* Try to free some space. */
150: if (ufsdirhash_recycle(memreqd) != 0)
151: return (-1);
152: /* Enough was freed, and list has been locked. */
153: }
154: ufs_dirhashmem += memreqd;
155: DIRHASHLIST_UNLOCK();
156:
157: /*
158: * Use non-blocking mallocs so that we will revert to a linear
159: * lookup on failure rather than potentially blocking forever.
160: */
161: MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO);
162: if (dh == NULL) {
163: DIRHASHLIST_LOCK();
164: ufs_dirhashmem -= memreqd;
165: DIRHASHLIST_UNLOCK();
166: return (-1);
167: }
168: MALLOC(dh->dh_hash, doff_t **, narrays * sizeof(dh->dh_hash[0]),
169: M_DIRHASH, M_NOWAIT | M_ZERO);
170: MALLOC(dh->dh_blkfree, u_int8_t *, nblocks * sizeof(dh->dh_blkfree[0]),
171: M_DIRHASH, M_NOWAIT);
172: if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
173: goto fail;
174: for (i = 0; i < narrays; i++) {
175: if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
176: goto fail;
177: for (j = 0; j < DH_NBLKOFF; j++)
178: dh->dh_hash[i][j] = DIRHASH_EMPTY;
179: }
180:
181: /* Initialise the hash table and block statistics. */
182: dh->dh_narrays = narrays;
183: dh->dh_hlen = nslots;
184: dh->dh_nblk = nblocks;
185: dh->dh_dirblks = dirblocks;
186: for (i = 0; i < dirblocks; i++)
187: dh->dh_blkfree[i] = dirblksiz / DIRALIGN;
188: for (i = 0; i < DH_NFSTATS; i++)
189: dh->dh_firstfree[i] = -1;
190: dh->dh_firstfree[DH_NFSTATS] = 0;
191: dh->dh_seqopt = 0;
192: dh->dh_seqoff = 0;
193: dh->dh_score = DH_SCOREINIT;
194: ip->i_dirhash = dh;
195:
196: bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
197: pos = 0;
198: while (pos < ip->i_size) {
199: /* If necessary, get the next directory block. */
200: if ((pos & bmask) == 0) {
201: if (bp != NULL)
202: brelse(bp);
203: if (VOP_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
204: goto fail;
205: }
206:
207: /* Add this entry to the hash. */
208: ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
209: if (ep->d_reclen == 0 || ep->d_reclen >
210: dirblksiz - (pos & (dirblksiz - 1))) {
211: /* Corrupted directory. */
212: brelse(bp);
213: goto fail;
214: }
215: if (ep->d_ino != 0) {
216: /* Add the entry (simplified ufsdirhash_add). */
217: slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
218: while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
219: slot = WRAPINCR(slot, dh->dh_hlen);
220: dh->dh_hused++;
221: DH_ENTRY(dh, slot) = pos;
222: ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep, needswap),
223: dirblksiz);
224: }
225: pos += ep->d_reclen;
226: }
227:
228: if (bp != NULL)
229: brelse(bp);
230: DIRHASHLIST_LOCK();
231: TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
232: dh->dh_onlist = 1;
233: DIRHASHLIST_UNLOCK();
234: return (0);
235:
236: fail:
237: if (dh->dh_hash != NULL) {
238: for (i = 0; i < narrays; i++)
239: if (dh->dh_hash[i] != NULL)
240: DIRHASH_BLKFREE(dh->dh_hash[i]);
241: FREE(dh->dh_hash, M_DIRHASH);
242: }
243: if (dh->dh_blkfree != NULL)
244: FREE(dh->dh_blkfree, M_DIRHASH);
245: FREE(dh, M_DIRHASH);
246: ip->i_dirhash = NULL;
247: DIRHASHLIST_LOCK();
248: ufs_dirhashmem -= memreqd;
249: DIRHASHLIST_UNLOCK();
250: return (-1);
251: }
252:
253: /*
254: * Free any hash table associated with inode 'ip'.
255: */
256: void
257: ufsdirhash_free(struct inode *ip)
258: {
259: struct dirhash *dh;
260: int i, mem;
261:
262: if ((dh = ip->i_dirhash) == NULL)
263: return;
264: DIRHASHLIST_LOCK();
265: DIRHASH_LOCK(dh);
266: if (dh->dh_onlist)
267: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
268: DIRHASH_UNLOCK(dh);
269: DIRHASHLIST_UNLOCK();
270:
271: /* The dirhash pointed to by 'dh' is exclusively ours now. */
272:
273: mem = sizeof(*dh);
274: if (dh->dh_hash != NULL) {
275: for (i = 0; i < dh->dh_narrays; i++)
276: DIRHASH_BLKFREE(dh->dh_hash[i]);
277: FREE(dh->dh_hash, M_DIRHASH);
278: FREE(dh->dh_blkfree, M_DIRHASH);
279: mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
280: dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
281: dh->dh_nblk * sizeof(*dh->dh_blkfree);
282: }
283: FREE(dh, M_DIRHASH);
284: ip->i_dirhash = NULL;
285:
286: DIRHASHLIST_LOCK();
287: ufs_dirhashmem -= mem;
288: DIRHASHLIST_UNLOCK();
289: }
290:
291: /*
292: * Find the offset of the specified name within the given inode.
293: * Returns 0 on success, ENOENT if the entry does not exist, or
294: * EJUSTRETURN if the caller should revert to a linear search.
295: *
296: * If successful, the directory offset is stored in *offp, and a
297: * pointer to a struct buf containing the entry is stored in *bpp. If
298: * prevoffp is non-NULL, the offset of the previous entry within
299: * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
300: * is the first in a block, the start of the block is used).
301: */
302: int
303: ufsdirhash_lookup(struct inode *ip, const char *name, int namelen, doff_t *offp,
304: struct buf **bpp, doff_t *prevoffp)
305: {
306: struct dirhash *dh, *dh_next;
307: struct direct *dp;
308: struct vnode *vp;
309: struct buf *bp;
310: doff_t blkoff, bmask, offset, prevoff;
311: int i, slot;
1.1.2.3 ! skrll 312: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1.2.2 skrll 313: int dirblksiz = ip->i_ump->um_dirblksiz;
314:
315: if ((dh = ip->i_dirhash) == NULL)
316: return (EJUSTRETURN);
317: /*
318: * Move this dirhash towards the end of the list if it has a
319: * score higher than the next entry, and acquire the dh_mtx.
320: * Optimise the case where it's already the last by performing
321: * an unlocked read of the TAILQ_NEXT pointer.
322: *
323: * In both cases, end up holding just dh_mtx.
324: */
325: if (TAILQ_NEXT(dh, dh_list) != NULL) {
326: DIRHASHLIST_LOCK();
327: DIRHASH_LOCK(dh);
328: /*
329: * If the new score will be greater than that of the next
330: * entry, then move this entry past it. With both mutexes
331: * held, dh_next won't go away, but its dh_score could
332: * change; that's not important since it is just a hint.
333: */
334: if (dh->dh_hash != NULL &&
335: (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
336: dh->dh_score >= dh_next->dh_score) {
337: KASSERT(dh->dh_onlist);
338: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
339: TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
340: dh_list);
341: }
342: DIRHASHLIST_UNLOCK();
343: } else {
344: /* Already the last, though that could change as we wait. */
345: DIRHASH_LOCK(dh);
346: }
347: if (dh->dh_hash == NULL) {
348: DIRHASH_UNLOCK(dh);
349: ufsdirhash_free(ip);
350: return (EJUSTRETURN);
351: }
352:
353: /* Update the score. */
354: if (dh->dh_score < DH_SCOREMAX)
355: dh->dh_score++;
356:
357: vp = ip->i_vnode;
358: bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
359: blkoff = -1;
360: bp = NULL;
361: restart:
362: slot = ufsdirhash_hash(dh, name, namelen);
363:
364: if (dh->dh_seqopt) {
365: /*
366: * Sequential access optimisation. dh_seqoff contains the
367: * offset of the directory entry immediately following
368: * the last entry that was looked up. Check if this offset
369: * appears in the hash chain for the name we are looking for.
370: */
371: for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
372: i = WRAPINCR(i, dh->dh_hlen))
373: if (offset == dh->dh_seqoff)
374: break;
375: if (offset == dh->dh_seqoff) {
376: /*
377: * We found an entry with the expected offset. This
378: * is probably the entry we want, but if not, the
379: * code below will turn off seqoff and retry.
1.1.2.3 ! skrll 380: */
1.1.2.2 skrll 381: slot = i;
1.1.2.3 ! skrll 382: } else
1.1.2.2 skrll 383: dh->dh_seqopt = 0;
384: }
385:
386: for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
387: slot = WRAPINCR(slot, dh->dh_hlen)) {
388: if (offset == DIRHASH_DEL)
389: continue;
390: DIRHASH_UNLOCK(dh);
391:
392: if (offset < 0 || offset >= ip->i_size)
393: panic("ufsdirhash_lookup: bad offset in hash array");
394: if ((offset & ~bmask) != blkoff) {
395: if (bp != NULL)
396: brelse(bp);
397: blkoff = offset & ~bmask;
398: if (VOP_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0)
399: return (EJUSTRETURN);
400: }
401: dp = (struct direct *)(bp->b_data + (offset & bmask));
402: if (dp->d_reclen == 0 || dp->d_reclen >
403: dirblksiz - (offset & (dirblksiz - 1))) {
404: /* Corrupted directory. */
405: brelse(bp);
406: return (EJUSTRETURN);
407: }
408: if (dp->d_namlen == namelen &&
409: bcmp(dp->d_name, name, namelen) == 0) {
410: /* Found. Get the prev offset if needed. */
411: if (prevoffp != NULL) {
412: if (offset & (dirblksiz - 1)) {
413: prevoff = ufsdirhash_getprev(dp,
414: offset, dirblksiz);
415: if (prevoff == -1) {
416: brelse(bp);
417: return (EJUSTRETURN);
418: }
419: } else
420: prevoff = offset;
421: *prevoffp = prevoff;
422: }
423:
424: /* Check for sequential access, and update offset. */
425: if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
426: dh->dh_seqopt = 1;
427: dh->dh_seqoff = offset + DIRSIZ(0, dp, needswap);
428:
429: *bpp = bp;
430: *offp = offset;
431: return (0);
432: }
433:
434: DIRHASH_LOCK(dh);
435: if (dh->dh_hash == NULL) {
436: DIRHASH_UNLOCK(dh);
437: if (bp != NULL)
438: brelse(bp);
439: ufsdirhash_free(ip);
440: return (EJUSTRETURN);
441: }
442: /*
443: * When the name doesn't match in the seqopt case, go back
444: * and search normally.
445: */
446: if (dh->dh_seqopt) {
447: dh->dh_seqopt = 0;
448: goto restart;
449: }
450: }
451: DIRHASH_UNLOCK(dh);
452: if (bp != NULL)
453: brelse(bp);
454: return (ENOENT);
455: }
456:
457: /*
458: * Find a directory block with room for 'slotneeded' bytes. Returns
459: * the offset of the directory entry that begins the free space.
460: * This will either be the offset of an existing entry that has free
461: * space at the end, or the offset of an entry with d_ino == 0 at
462: * the start of a DIRBLKSIZ block.
463: *
464: * To use the space, the caller may need to compact existing entries in
465: * the directory. The total number of bytes in all of the entries involved
466: * in the compaction is stored in *slotsize. In other words, all of
467: * the entries that must be compacted are exactly contained in the
468: * region beginning at the returned offset and spanning *slotsize bytes.
469: *
470: * Returns -1 if no space was found, indicating that the directory
471: * must be extended.
472: */
473: doff_t
474: ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
475: {
476: struct direct *dp;
477: struct dirhash *dh;
478: struct buf *bp;
479: doff_t pos, slotstart;
480: int dirblock, error, freebytes, i;
1.1.2.3 ! skrll 481: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1.2.2 skrll 482: int dirblksiz = ip->i_ump->um_dirblksiz;
483:
484: if ((dh = ip->i_dirhash) == NULL)
485: return (-1);
486: DIRHASH_LOCK(dh);
487: if (dh->dh_hash == NULL) {
488: DIRHASH_UNLOCK(dh);
489: ufsdirhash_free(ip);
490: return (-1);
491: }
492:
493: /* Find a directory block with the desired free space. */
494: dirblock = -1;
495: for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
496: if ((dirblock = dh->dh_firstfree[i]) != -1)
497: break;
498: if (dirblock == -1) {
499: DIRHASH_UNLOCK(dh);
500: return (-1);
501: }
502:
503: KASSERT(dirblock < dh->dh_nblk &&
504: dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN));
505: DIRHASH_UNLOCK(dh);
506: pos = dirblock * dirblksiz;
507: error = VOP_BLKATOFF(ip->i_vnode, (off_t)pos, (void *)&dp, &bp);
508: if (error)
509: return (-1);
510: /* Find the first entry with free space. */
511: for (i = 0; i < dirblksiz; ) {
512: if (dp->d_reclen == 0) {
513: brelse(bp);
514: return (-1);
515: }
516: if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp, needswap))
517: break;
518: i += dp->d_reclen;
519: dp = (struct direct *)((char *)dp + dp->d_reclen);
520: }
521: if (i > dirblksiz) {
522: brelse(bp);
523: return (-1);
524: }
525: slotstart = pos + i;
526:
527: /* Find the range of entries needed to get enough space */
528: freebytes = 0;
529: while (i < dirblksiz && freebytes < slotneeded) {
530: freebytes += dp->d_reclen;
531: if (dp->d_ino != 0)
532: freebytes -= DIRSIZ(0, dp, needswap);
533: if (dp->d_reclen == 0) {
534: brelse(bp);
535: return (-1);
536: }
537: i += dp->d_reclen;
538: dp = (struct direct *)((char *)dp + dp->d_reclen);
539: }
540: if (i > dirblksiz) {
541: brelse(bp);
542: return (-1);
543: }
544: if (freebytes < slotneeded)
545: panic("ufsdirhash_findfree: free mismatch");
546: brelse(bp);
547: *slotsize = pos + i - slotstart;
548: return (slotstart);
549: }
550:
551: /*
552: * Return the start of the unused space at the end of a directory, or
553: * -1 if there are no trailing unused blocks.
554: */
555: doff_t
556: ufsdirhash_enduseful(struct inode *ip)
557: {
558: struct dirhash *dh;
559: int i;
560: int dirblksiz = ip->i_ump->um_dirblksiz;
561:
562: if ((dh = ip->i_dirhash) == NULL)
563: return (-1);
564: DIRHASH_LOCK(dh);
565: if (dh->dh_hash == NULL) {
566: DIRHASH_UNLOCK(dh);
567: ufsdirhash_free(ip);
568: return (-1);
569: }
570:
571: if (dh->dh_blkfree[dh->dh_dirblks - 1] != dirblksiz / DIRALIGN) {
572: DIRHASH_UNLOCK(dh);
573: return (-1);
574: }
575:
576: for (i = dh->dh_dirblks - 1; i >= 0; i--)
577: if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
578: break;
579: DIRHASH_UNLOCK(dh);
580: return ((doff_t)(i + 1) * dirblksiz);
581: }
582:
583: /*
584: * Insert information into the hash about a new directory entry. dirp
585: * points to a struct direct containing the entry, and offset specifies
586: * the offset of this entry.
587: */
588: void
589: ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
590: {
591: struct dirhash *dh;
592: int slot;
593: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
594: int dirblksiz = ip->i_ump->um_dirblksiz;
595:
596: if ((dh = ip->i_dirhash) == NULL)
597: return;
598: DIRHASH_LOCK(dh);
599: if (dh->dh_hash == NULL) {
600: DIRHASH_UNLOCK(dh);
601: ufsdirhash_free(ip);
602: return;
603: }
604:
605: KASSERT(offset < dh->dh_dirblks * dirblksiz);
606: /*
607: * Normal hash usage is < 66%. If the usage gets too high then
608: * remove the hash entirely and let it be rebuilt later.
609: */
610: if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
611: DIRHASH_UNLOCK(dh);
612: ufsdirhash_free(ip);
613: return;
614: }
615:
616: /* Find a free hash slot (empty or deleted), and add the entry. */
617: slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
618: while (DH_ENTRY(dh, slot) >= 0)
619: slot = WRAPINCR(slot, dh->dh_hlen);
620: if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
621: dh->dh_hused++;
622: DH_ENTRY(dh, slot) = offset;
623:
624: /* Update the per-block summary info. */
625: ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp, needswap), dirblksiz);
626: DIRHASH_UNLOCK(dh);
627: }
628:
629: /*
630: * Remove the specified directory entry from the hash. The entry to remove
631: * is defined by the name in `dirp', which must exist at the specified
632: * `offset' within the directory.
633: */
634: void
635: ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
636: {
637: struct dirhash *dh;
638: int slot;
639: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
640: int dirblksiz = ip->i_ump->um_dirblksiz;
641:
642: if ((dh = ip->i_dirhash) == NULL)
643: return;
644: DIRHASH_LOCK(dh);
645: if (dh->dh_hash == NULL) {
646: DIRHASH_UNLOCK(dh);
647: ufsdirhash_free(ip);
648: return;
649: }
650:
651: KASSERT(offset < dh->dh_dirblks * dirblksiz);
652: /* Find the entry */
653: slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
654:
655: /* Remove the hash entry. */
656: ufsdirhash_delslot(dh, slot);
657:
658: /* Update the per-block summary info. */
659: ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp, needswap), dirblksiz);
660: DIRHASH_UNLOCK(dh);
661: }
662:
663: /*
664: * Change the offset associated with a directory entry in the hash. Used
665: * when compacting directory blocks.
666: */
667: void
668: ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
669: doff_t newoff)
670: {
671: struct dirhash *dh;
672: int slot;
673:
674: if ((dh = ip->i_dirhash) == NULL)
675: return;
676: DIRHASH_LOCK(dh);
677: if (dh->dh_hash == NULL) {
678: DIRHASH_UNLOCK(dh);
679: ufsdirhash_free(ip);
680: return;
681: }
682:
683: KASSERT(oldoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz &&
684: newoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz);
685: /* Find the entry, and update the offset. */
686: slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
687: DH_ENTRY(dh, slot) = newoff;
688: DIRHASH_UNLOCK(dh);
689: }
690:
691: /*
692: * Inform dirhash that the directory has grown by one block that
693: * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
694: */
695: void
696: ufsdirhash_newblk(struct inode *ip, doff_t offset)
697: {
698: struct dirhash *dh;
699: int block;
700: int dirblksiz = ip->i_ump->um_dirblksiz;
701:
702: if ((dh = ip->i_dirhash) == NULL)
703: return;
704: DIRHASH_LOCK(dh);
705: if (dh->dh_hash == NULL) {
706: DIRHASH_UNLOCK(dh);
707: ufsdirhash_free(ip);
708: return;
709: }
710:
711: KASSERT(offset == dh->dh_dirblks * dirblksiz);
712: block = offset / dirblksiz;
713: if (block >= dh->dh_nblk) {
714: /* Out of space; must rebuild. */
715: DIRHASH_UNLOCK(dh);
716: ufsdirhash_free(ip);
717: return;
718: }
719: dh->dh_dirblks = block + 1;
720:
721: /* Account for the new free block. */
722: dh->dh_blkfree[block] = dirblksiz / DIRALIGN;
723: if (dh->dh_firstfree[DH_NFSTATS] == -1)
724: dh->dh_firstfree[DH_NFSTATS] = block;
725: DIRHASH_UNLOCK(dh);
726: }
727:
728: /*
729: * Inform dirhash that the directory is being truncated.
730: */
731: void
732: ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
733: {
734: struct dirhash *dh;
735: int block, i;
736: int dirblksiz = ip->i_ump->um_dirblksiz;
737:
738: if ((dh = ip->i_dirhash) == NULL)
739: return;
740: DIRHASH_LOCK(dh);
741: if (dh->dh_hash == NULL) {
742: DIRHASH_UNLOCK(dh);
743: ufsdirhash_free(ip);
744: return;
745: }
746:
747: KASSERT(offset <= dh->dh_dirblks * dirblksiz);
748: block = howmany(offset, dirblksiz);
749: /*
750: * If the directory shrinks to less than 1/8 of dh_nblk blocks
751: * (about 20% of its original size due to the 50% extra added in
752: * ufsdirhash_build) then free it, and let the caller rebuild
753: * if necessary.
754: */
755: if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
756: DIRHASH_UNLOCK(dh);
757: ufsdirhash_free(ip);
758: return;
759: }
760:
761: /*
762: * Remove any `first free' information pertaining to the
763: * truncated blocks. All blocks we're removing should be
764: * completely unused.
765: */
766: if (dh->dh_firstfree[DH_NFSTATS] >= block)
767: dh->dh_firstfree[DH_NFSTATS] = -1;
768: for (i = block; i < dh->dh_dirblks; i++)
769: if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
770: panic("ufsdirhash_dirtrunc: blocks in use");
771: for (i = 0; i < DH_NFSTATS; i++)
772: if (dh->dh_firstfree[i] >= block)
773: panic("ufsdirhash_dirtrunc: first free corrupt");
774: dh->dh_dirblks = block;
775: DIRHASH_UNLOCK(dh);
776: }
777:
778: /*
779: * Debugging function to check that the dirhash information about
780: * a directory block matches its actual contents. Panics if a mismatch
781: * is detected.
782: *
783: * On entry, `buf' should point to the start of an in-core
784: * DIRBLKSIZ-sized directory block, and `offset' should contain the
785: * offset from the start of the directory of that block.
786: */
787: void
788: ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
789: {
790: struct dirhash *dh;
791: struct direct *dp;
792: int block, ffslot, i, nfree;
793: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
794: int dirblksiz = ip->i_ump->um_dirblksiz;
795:
796: if (!ufs_dirhashcheck)
797: return;
798: if ((dh = ip->i_dirhash) == NULL)
799: return;
800: DIRHASH_LOCK(dh);
801: if (dh->dh_hash == NULL) {
802: DIRHASH_UNLOCK(dh);
803: ufsdirhash_free(ip);
804: return;
805: }
806:
807: block = offset / dirblksiz;
808: if ((offset & (dirblksiz - 1)) != 0 || block >= dh->dh_dirblks)
809: panic("ufsdirhash_checkblock: bad offset");
810:
811: nfree = 0;
812: for (i = 0; i < dirblksiz; i += dp->d_reclen) {
813: dp = (struct direct *)(buf + i);
814: if (dp->d_reclen == 0 || i + dp->d_reclen > dirblksiz)
815: panic("ufsdirhash_checkblock: bad dir");
816:
817: if (dp->d_ino == 0) {
818: #if 0
819: /*
820: * XXX entries with d_ino == 0 should only occur
821: * at the start of a DIRBLKSIZ block. However the
822: * ufs code is tolerant of such entries at other
823: * offsets, and fsck does not fix them.
824: */
825: if (i != 0)
826: panic("ufsdirhash_checkblock: bad dir inode");
827: #endif
828: nfree += dp->d_reclen;
829: continue;
830: }
831:
832: /* Check that the entry exists (will panic if it doesn't). */
833: ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
834:
835: nfree += dp->d_reclen - DIRSIZ(0, dp, needswap);
836: }
837: if (i != dirblksiz)
838: panic("ufsdirhash_checkblock: bad dir end");
839:
840: if (dh->dh_blkfree[block] * DIRALIGN != nfree)
841: panic("ufsdirhash_checkblock: bad free count");
842:
843: ffslot = BLKFREE2IDX(nfree / DIRALIGN);
844: for (i = 0; i <= DH_NFSTATS; i++)
845: if (dh->dh_firstfree[i] == block && i != ffslot)
846: panic("ufsdirhash_checkblock: bad first-free");
847: if (dh->dh_firstfree[ffslot] == -1)
848: panic("ufsdirhash_checkblock: missing first-free entry");
849: DIRHASH_UNLOCK(dh);
850: }
851:
852: /*
853: * Hash the specified filename into a dirhash slot.
854: */
855: static int
856: ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen)
857: {
858: u_int32_t hash;
859:
860: /*
861: * We hash the name and then some other bit of data that is
862: * invariant over the dirhash's lifetime. Otherwise names
863: * differing only in the last byte are placed close to one
864: * another in the table, which is bad for linear probing.
865: */
866: hash = hash32_buf(name, namelen, HASH32_BUF_INIT);
867: hash = hash32_buf(&dh, sizeof(dh), hash);
868: return (hash % dh->dh_hlen);
869: }
870:
871: /*
872: * Adjust the number of free bytes in the block containing `offset'
873: * by the value specified by `diff'.
874: *
875: * The caller must ensure we have exclusive access to `dh'; normally
876: * that means that dh_mtx should be held, but this is also called
877: * from ufsdirhash_build() where exclusive access can be assumed.
878: */
879: static void
880: ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, int dirblksiz)
881: {
882: int block, i, nfidx, ofidx;
883:
884: /* Update the per-block summary info. */
885: block = offset / dirblksiz;
886: KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks);
887: ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
888: dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
889: nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
890:
891: /* Update the `first free' list if necessary. */
892: if (ofidx != nfidx) {
893: /* If removing, scan forward for the next block. */
894: if (dh->dh_firstfree[ofidx] == block) {
895: for (i = block + 1; i < dh->dh_dirblks; i++)
896: if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
897: break;
898: dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
899: }
900:
901: /* Make this the new `first free' if necessary */
902: if (dh->dh_firstfree[nfidx] > block ||
903: dh->dh_firstfree[nfidx] == -1)
904: dh->dh_firstfree[nfidx] = block;
905: }
906: }
907:
908: /*
909: * Find the specified name which should have the specified offset.
910: * Returns a slot number, and panics on failure.
911: *
912: * `dh' must be locked on entry and remains so on return.
913: */
914: static int
915: ufsdirhash_findslot(struct dirhash *dh, const char *name, int namelen,
916: doff_t offset)
917: {
918: int slot;
919:
920: /* Find the entry. */
921: KASSERT(dh->dh_hused < dh->dh_hlen);
922: slot = ufsdirhash_hash(dh, name, namelen);
923: while (DH_ENTRY(dh, slot) != offset &&
924: DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
925: slot = WRAPINCR(slot, dh->dh_hlen);
926: if (DH_ENTRY(dh, slot) != offset)
927: panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
928:
929: return (slot);
930: }
931:
932: /*
933: * Remove the entry corresponding to the specified slot from the hash array.
934: *
935: * `dh' must be locked on entry and remains so on return.
936: */
937: static void
938: ufsdirhash_delslot(struct dirhash *dh, int slot)
939: {
940: int i;
941:
942: /* Mark the entry as deleted. */
943: DH_ENTRY(dh, slot) = DIRHASH_DEL;
944:
945: /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
946: for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
947: i = WRAPINCR(i, dh->dh_hlen);
948: if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
949: i = WRAPDECR(i, dh->dh_hlen);
950: while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
951: DH_ENTRY(dh, i) = DIRHASH_EMPTY;
952: dh->dh_hused--;
953: i = WRAPDECR(i, dh->dh_hlen);
954: }
955: KASSERT(dh->dh_hused >= 0);
956: }
957: }
958:
959: /*
960: * Given a directory entry and its offset, find the offset of the
961: * previous entry in the same DIRBLKSIZ-sized block. Returns an
962: * offset, or -1 if there is no previous entry in the block or some
963: * other problem occurred.
964: */
965: static doff_t
966: ufsdirhash_getprev(struct direct *dirp, doff_t offset, int dirblksiz)
967: {
968: struct direct *dp;
969: char *blkbuf;
970: doff_t blkoff, prevoff;
971: int entrypos, i;
972:
973: blkoff = offset & ~(dirblksiz - 1); /* offset of start of block */
974: entrypos = offset & (dirblksiz - 1); /* entry relative to block */
975: blkbuf = (char *)dirp - entrypos;
976: prevoff = blkoff;
977:
978: /* If `offset' is the start of a block, there is no previous entry. */
979: if (entrypos == 0)
980: return (-1);
981:
982: /* Scan from the start of the block until we get to the entry. */
983: for (i = 0; i < entrypos; i += dp->d_reclen) {
984: dp = (struct direct *)(blkbuf + i);
985: if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
986: return (-1); /* Corrupted directory. */
987: prevoff = blkoff + i;
988: }
989: return (prevoff);
990: }
991:
992: /*
993: * Try to free up `wanted' bytes by stealing memory from existing
994: * dirhashes. Returns zero with list locked if successful.
995: */
996: static int
997: ufsdirhash_recycle(int wanted)
998: {
999: struct dirhash *dh;
1000: doff_t **hash;
1001: u_int8_t *blkfree;
1002: int i, mem, narrays;
1003:
1004: DIRHASHLIST_LOCK();
1005: while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1006: /* Find a dirhash, and lock it. */
1007: if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1008: DIRHASHLIST_UNLOCK();
1009: return (-1);
1010: }
1011: DIRHASH_LOCK(dh);
1012: KASSERT(dh->dh_hash != NULL);
1013:
1014: /* Decrement the score; only recycle if it becomes zero. */
1015: if (--dh->dh_score > 0) {
1016: DIRHASH_UNLOCK(dh);
1017: DIRHASHLIST_UNLOCK();
1018: return (-1);
1019: }
1020:
1021: /* Remove it from the list and detach its memory. */
1022: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1023: dh->dh_onlist = 0;
1024: hash = dh->dh_hash;
1025: dh->dh_hash = NULL;
1026: blkfree = dh->dh_blkfree;
1027: dh->dh_blkfree = NULL;
1028: narrays = dh->dh_narrays;
1029: mem = narrays * sizeof(*dh->dh_hash) +
1030: narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1031: dh->dh_nblk * sizeof(*dh->dh_blkfree);
1032:
1033: /* Unlock everything, free the detached memory. */
1034: DIRHASH_UNLOCK(dh);
1035: DIRHASHLIST_UNLOCK();
1036: for (i = 0; i < narrays; i++)
1037: DIRHASH_BLKFREE(hash[i]);
1038: FREE(hash, M_DIRHASH);
1039: FREE(blkfree, M_DIRHASH);
1040:
1041: /* Account for the returned memory, and repeat if necessary. */
1042: DIRHASHLIST_LOCK();
1043: ufs_dirhashmem -= mem;
1044: }
1045: /* Success; return with list locked. */
1046: return (0);
1047: }
1048:
1049: void
1050: ufsdirhash_init()
1051: {
1052: #ifdef _LKM
1053: pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(daddr_t), 0, 0, 0,
1054: "ufsdirhash", &pool_allocator_nointr);
1055: #endif
1056: TAILQ_INIT(&ufsdirhash_list);
1057: }
1058:
1059: void
1060: ufsdirhash_done()
1061: {
1062: KASSERT(TAILQ_EMPTY(&ufsdirhash_list));
1063: #ifdef _LKM
1064: pool_destroy(&ufsdirhash_pool);
1065: #endif
1066: }
1067:
1068: SYSCTL_SETUP(sysctl_vfs_ufs_setup, "sysctl vfs.ufs.dirhash subtree setup")
1069: {
1070: struct sysctlnode *rnode, *cnode;
1071:
1072: sysctl_createv(clog, 0, NULL, &rnode,
1073: CTLFLAG_PERMANENT,
1074: CTLTYPE_NODE, "vfs", NULL,
1075: NULL, 0, NULL, 0,
1076: CTL_VFS, CTL_EOL);
1077:
1078: sysctl_createv(clog, 0, &rnode, &rnode,
1079: CTLFLAG_PERMANENT,
1080: CTLTYPE_NODE, "ufs",
1081: SYSCTL_DESCR("ufs"),
1082: NULL, 0, NULL, 0,
1083: CTL_CREATE, CTL_EOL);
1084:
1085: sysctl_createv(clog, 0, &rnode, &rnode,
1086: CTLFLAG_PERMANENT,
1087: CTLTYPE_NODE, "dirhash",
1088: SYSCTL_DESCR("dirhash"),
1089: NULL, 0, NULL, 0,
1090: CTL_CREATE, CTL_EOL);
1091:
1092: sysctl_createv(clog, 0, &rnode, &cnode,
1093: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1094: CTLTYPE_INT, "minblocks",
1095: SYSCTL_DESCR("minimum hashed directory size in blocks"),
1096: NULL, 0, &ufs_dirhashminblks, 0,
1097: CTL_CREATE, CTL_EOL);
1098:
1099: sysctl_createv(clog, 0, &rnode, &cnode,
1100: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1101: CTLTYPE_INT, "maxmem",
1102: SYSCTL_DESCR("maximum dirhash memory usage"),
1103: NULL, 0, &ufs_dirhashmaxmem, 0,
1104: CTL_CREATE, CTL_EOL);
1105:
1106: sysctl_createv(clog, 0, &rnode, &cnode,
1107: CTLFLAG_PERMANENT|CTLFLAG_READONLY,
1108: CTLTYPE_INT, "memused",
1109: SYSCTL_DESCR("current dirhash memory usage"),
1110: NULL, 0, &ufs_dirhashmem, 0,
1111: CTL_CREATE, CTL_EOL);
1112:
1113: sysctl_createv(clog, 0, &rnode, &cnode,
1114: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1115: CTLTYPE_INT, "docheck",
1116: SYSCTL_DESCR("enable extra sanity checks"),
1117: NULL, 0, &ufs_dirhashcheck, 0,
1118: CTL_CREATE, CTL_EOL);
1119: }
1120:
1121: #endif /* UFS_DIRHASH */
CVSweb <webmaster@jp.NetBSD.org>