Annotation of src/sys/ufs/ufs/ufs_dirhash.c, Revision 1.10.2.1
1.10.2.1! rpaulo 1: /* $NetBSD: ufs_dirhash.c,v 1.11 2006/03/19 17:50:42 matt Exp $ */
1.1 rumble 2:
3: /*
4: * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved.
5: *
6: * Redistribution and use in source and binary forms, with or without
7: * modification, are permitted provided that the following conditions
8: * are met:
9: * 1. Redistributions of source code must retain the above copyright
10: * notice, this list of conditions and the following disclaimer.
11: * 2. Redistributions in binary form must reproduce the above copyright
12: * notice, this list of conditions and the following disclaimer in the
13: * documentation and/or other materials provided with the distribution.
14: *
15: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25: * SUCH DAMAGE.
26: *
27: * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $
28: */
29:
30: /*
31: * This implements a hash-based lookup scheme for UFS directories.
32: */
33:
34: #include <sys/param.h>
35: #include <sys/systm.h>
36: #include <sys/kernel.h>
37: #include <sys/malloc.h>
38: #include <sys/types.h>
39: #include <sys/hash.h>
40: #include <sys/proc.h>
41: #include <sys/buf.h>
42: #include <sys/vnode.h>
43: #include <sys/mount.h>
44: #include <sys/pool.h>
45: #include <sys/sysctl.h>
46:
47: #include <ufs/ufs/quota.h>
48: #include <ufs/ufs/inode.h>
49: #include <ufs/ufs/dir.h>
50: #include <ufs/ufs/dirhash.h>
51: #include <ufs/ufs/ufsmount.h>
52: #include <ufs/ufs/ufs_bswap.h>
53: #include <ufs/ufs/ufs_extern.h>
54:
55: #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1))
56: #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1))
57: #define OFSFMT(ip) ((ip)->i_ump->um_maxsymlinklen <= 0)
58: #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
59:
60: static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables");
61:
62: static int ufs_dirhashminblks = 5;
63: static int ufs_dirhashmaxmem = 2 * 1024 * 1024;
1.2 perry 64: static int ufs_dirhashmem;
1.1 rumble 65: static int ufs_dirhashcheck = 0;
66:
67: static int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen);
68: static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff,
69: int dirblksiz);
70: static void ufsdirhash_delslot(struct dirhash *dh, int slot);
71: static int ufsdirhash_findslot(struct dirhash *dh, const char *name,
72: int namelen, doff_t offset);
73: static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset,
74: int dirblksiz);
75: static int ufsdirhash_recycle(int wanted);
76:
1.9 yamt 77: static POOL_INIT(ufsdirhash_pool, DH_NBLKOFF * sizeof(daddr_t), 0, 0, 0,
78: "ufsdirhash", &pool_allocator_nointr);
1.1 rumble 79:
80: #define DIRHASHLIST_LOCK() do { } while (0)
81: #define DIRHASHLIST_UNLOCK() do { } while (0)
82: #define DIRHASH_LOCK(dh) do { } while (0)
83: #define DIRHASH_UNLOCK(dh) do { } while (0)
84: #define DIRHASH_BLKALLOC_WAITOK() pool_get(&ufsdirhash_pool, PR_WAITOK)
85: #define DIRHASH_BLKFREE(ptr) pool_put(&ufsdirhash_pool, ptr)
86:
87: /* Dirhash list; recently-used entries are near the tail. */
88: static TAILQ_HEAD(, dirhash) ufsdirhash_list;
89:
90: /*
91: * Attempt to build up a hash table for the directory contents in
92: * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
93: */
94: int
95: ufsdirhash_build(struct inode *ip)
96: {
97: struct dirhash *dh;
98: struct buf *bp = NULL;
99: struct direct *ep;
100: struct vnode *vp;
101: doff_t bmask, pos;
102: int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
1.2 perry 103: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1 rumble 104: int dirblksiz = ip->i_ump->um_dirblksiz;
105:
106: /* Check if we can/should use dirhash. */
107: if (ip->i_dirhash == NULL) {
108: if (ip->i_size < (ufs_dirhashminblks * dirblksiz) || OFSFMT(ip))
109: return (-1);
110: } else {
111: /* Hash exists, but sysctls could have changed. */
112: if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
113: ufs_dirhashmem > ufs_dirhashmaxmem) {
114: ufsdirhash_free(ip);
115: return (-1);
116: }
117: /* Check if hash exists and is intact (note: unlocked read). */
118: if (ip->i_dirhash->dh_hash != NULL)
119: return (0);
120: /* Free the old, recycled hash and build a new one. */
121: ufsdirhash_free(ip);
122: }
123:
124: /* Don't hash removed directories. */
125: if (ip->i_ffs_effnlink == 0)
126: return (-1);
127:
128: vp = ip->i_vnode;
129: /* Allocate 50% more entries than this dir size could ever need. */
130: KASSERT(ip->i_size >= dirblksiz);
131: nslots = ip->i_size / DIRECTSIZ(1);
132: nslots = (nslots * 3 + 1) / 2;
133: narrays = howmany(nslots, DH_NBLKOFF);
134: nslots = narrays * DH_NBLKOFF;
135: dirblocks = howmany(ip->i_size, dirblksiz);
136: nblocks = (dirblocks * 3 + 1) / 2;
137:
138: memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
139: narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
140: nblocks * sizeof(*dh->dh_blkfree);
141: DIRHASHLIST_LOCK();
142: if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
143: DIRHASHLIST_UNLOCK();
144: if (memreqd > ufs_dirhashmaxmem / 2)
145: return (-1);
146:
147: /* Try to free some space. */
148: if (ufsdirhash_recycle(memreqd) != 0)
149: return (-1);
150: /* Enough was freed, and list has been locked. */
151: }
152: ufs_dirhashmem += memreqd;
153: DIRHASHLIST_UNLOCK();
154:
155: /*
156: * Use non-blocking mallocs so that we will revert to a linear
157: * lookup on failure rather than potentially blocking forever.
158: */
159: MALLOC(dh, struct dirhash *, sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO);
160: if (dh == NULL) {
161: DIRHASHLIST_LOCK();
162: ufs_dirhashmem -= memreqd;
163: DIRHASHLIST_UNLOCK();
164: return (-1);
165: }
1.10.2.1! rpaulo 166: dh->dh_hash = (doff_t **)malloc(narrays * sizeof(dh->dh_hash[0]),
1.1 rumble 167: M_DIRHASH, M_NOWAIT | M_ZERO);
1.10.2.1! rpaulo 168: dh->dh_blkfree = (u_int8_t *)malloc(nblocks * sizeof(dh->dh_blkfree[0]),
1.1 rumble 169: M_DIRHASH, M_NOWAIT);
170: if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
171: goto fail;
172: for (i = 0; i < narrays; i++) {
173: if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
174: goto fail;
175: for (j = 0; j < DH_NBLKOFF; j++)
176: dh->dh_hash[i][j] = DIRHASH_EMPTY;
177: }
178:
179: /* Initialise the hash table and block statistics. */
180: dh->dh_narrays = narrays;
181: dh->dh_hlen = nslots;
182: dh->dh_nblk = nblocks;
183: dh->dh_dirblks = dirblocks;
184: for (i = 0; i < dirblocks; i++)
185: dh->dh_blkfree[i] = dirblksiz / DIRALIGN;
186: for (i = 0; i < DH_NFSTATS; i++)
187: dh->dh_firstfree[i] = -1;
188: dh->dh_firstfree[DH_NFSTATS] = 0;
189: dh->dh_seqopt = 0;
190: dh->dh_seqoff = 0;
191: dh->dh_score = DH_SCOREINIT;
192: ip->i_dirhash = dh;
193:
194: bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
195: pos = 0;
196: while (pos < ip->i_size) {
1.8 yamt 197: if ((curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
198: != 0) {
199: preempt(1);
200: }
1.1 rumble 201: /* If necessary, get the next directory block. */
202: if ((pos & bmask) == 0) {
203: if (bp != NULL)
204: brelse(bp);
1.10 yamt 205: if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp) != 0)
1.1 rumble 206: goto fail;
207: }
208:
209: /* Add this entry to the hash. */
210: ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
211: if (ep->d_reclen == 0 || ep->d_reclen >
212: dirblksiz - (pos & (dirblksiz - 1))) {
213: /* Corrupted directory. */
214: brelse(bp);
215: goto fail;
216: }
217: if (ep->d_ino != 0) {
218: /* Add the entry (simplified ufsdirhash_add). */
219: slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
220: while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
221: slot = WRAPINCR(slot, dh->dh_hlen);
222: dh->dh_hused++;
223: DH_ENTRY(dh, slot) = pos;
224: ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep, needswap),
225: dirblksiz);
226: }
227: pos += ep->d_reclen;
228: }
229:
230: if (bp != NULL)
231: brelse(bp);
232: DIRHASHLIST_LOCK();
233: TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
234: dh->dh_onlist = 1;
235: DIRHASHLIST_UNLOCK();
236: return (0);
237:
238: fail:
239: if (dh->dh_hash != NULL) {
240: for (i = 0; i < narrays; i++)
241: if (dh->dh_hash[i] != NULL)
242: DIRHASH_BLKFREE(dh->dh_hash[i]);
243: FREE(dh->dh_hash, M_DIRHASH);
244: }
245: if (dh->dh_blkfree != NULL)
246: FREE(dh->dh_blkfree, M_DIRHASH);
247: FREE(dh, M_DIRHASH);
248: ip->i_dirhash = NULL;
249: DIRHASHLIST_LOCK();
250: ufs_dirhashmem -= memreqd;
251: DIRHASHLIST_UNLOCK();
252: return (-1);
253: }
254:
255: /*
256: * Free any hash table associated with inode 'ip'.
257: */
258: void
259: ufsdirhash_free(struct inode *ip)
260: {
261: struct dirhash *dh;
262: int i, mem;
263:
264: if ((dh = ip->i_dirhash) == NULL)
265: return;
266: DIRHASHLIST_LOCK();
267: DIRHASH_LOCK(dh);
268: if (dh->dh_onlist)
269: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
270: DIRHASH_UNLOCK(dh);
271: DIRHASHLIST_UNLOCK();
272:
273: /* The dirhash pointed to by 'dh' is exclusively ours now. */
274:
275: mem = sizeof(*dh);
276: if (dh->dh_hash != NULL) {
277: for (i = 0; i < dh->dh_narrays; i++)
278: DIRHASH_BLKFREE(dh->dh_hash[i]);
279: FREE(dh->dh_hash, M_DIRHASH);
280: FREE(dh->dh_blkfree, M_DIRHASH);
281: mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
282: dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
283: dh->dh_nblk * sizeof(*dh->dh_blkfree);
284: }
285: FREE(dh, M_DIRHASH);
286: ip->i_dirhash = NULL;
287:
288: DIRHASHLIST_LOCK();
289: ufs_dirhashmem -= mem;
290: DIRHASHLIST_UNLOCK();
291: }
292:
293: /*
294: * Find the offset of the specified name within the given inode.
295: * Returns 0 on success, ENOENT if the entry does not exist, or
296: * EJUSTRETURN if the caller should revert to a linear search.
297: *
298: * If successful, the directory offset is stored in *offp, and a
299: * pointer to a struct buf containing the entry is stored in *bpp. If
300: * prevoffp is non-NULL, the offset of the previous entry within
301: * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
302: * is the first in a block, the start of the block is used).
303: */
304: int
305: ufsdirhash_lookup(struct inode *ip, const char *name, int namelen, doff_t *offp,
306: struct buf **bpp, doff_t *prevoffp)
307: {
308: struct dirhash *dh, *dh_next;
309: struct direct *dp;
310: struct vnode *vp;
311: struct buf *bp;
312: doff_t blkoff, bmask, offset, prevoff;
313: int i, slot;
1.2 perry 314: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1 rumble 315: int dirblksiz = ip->i_ump->um_dirblksiz;
316:
317: if ((dh = ip->i_dirhash) == NULL)
318: return (EJUSTRETURN);
319: /*
320: * Move this dirhash towards the end of the list if it has a
321: * score higher than the next entry, and acquire the dh_mtx.
322: * Optimise the case where it's already the last by performing
323: * an unlocked read of the TAILQ_NEXT pointer.
324: *
325: * In both cases, end up holding just dh_mtx.
326: */
327: if (TAILQ_NEXT(dh, dh_list) != NULL) {
328: DIRHASHLIST_LOCK();
329: DIRHASH_LOCK(dh);
330: /*
331: * If the new score will be greater than that of the next
332: * entry, then move this entry past it. With both mutexes
333: * held, dh_next won't go away, but its dh_score could
334: * change; that's not important since it is just a hint.
335: */
336: if (dh->dh_hash != NULL &&
337: (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
338: dh->dh_score >= dh_next->dh_score) {
339: KASSERT(dh->dh_onlist);
340: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
341: TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
342: dh_list);
343: }
344: DIRHASHLIST_UNLOCK();
345: } else {
346: /* Already the last, though that could change as we wait. */
347: DIRHASH_LOCK(dh);
348: }
349: if (dh->dh_hash == NULL) {
350: DIRHASH_UNLOCK(dh);
351: ufsdirhash_free(ip);
352: return (EJUSTRETURN);
353: }
354:
355: /* Update the score. */
356: if (dh->dh_score < DH_SCOREMAX)
357: dh->dh_score++;
358:
359: vp = ip->i_vnode;
360: bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
361: blkoff = -1;
362: bp = NULL;
363: restart:
364: slot = ufsdirhash_hash(dh, name, namelen);
365:
366: if (dh->dh_seqopt) {
367: /*
368: * Sequential access optimisation. dh_seqoff contains the
369: * offset of the directory entry immediately following
370: * the last entry that was looked up. Check if this offset
371: * appears in the hash chain for the name we are looking for.
372: */
373: for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
374: i = WRAPINCR(i, dh->dh_hlen))
375: if (offset == dh->dh_seqoff)
376: break;
377: if (offset == dh->dh_seqoff) {
378: /*
379: * We found an entry with the expected offset. This
380: * is probably the entry we want, but if not, the
381: * code below will turn off seqoff and retry.
1.2 perry 382: */
1.1 rumble 383: slot = i;
1.2 perry 384: } else
1.1 rumble 385: dh->dh_seqopt = 0;
386: }
387:
388: for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
389: slot = WRAPINCR(slot, dh->dh_hlen)) {
390: if (offset == DIRHASH_DEL)
391: continue;
392: DIRHASH_UNLOCK(dh);
393:
394: if (offset < 0 || offset >= ip->i_size)
395: panic("ufsdirhash_lookup: bad offset in hash array");
396: if ((offset & ~bmask) != blkoff) {
397: if (bp != NULL)
398: brelse(bp);
399: blkoff = offset & ~bmask;
1.10 yamt 400: if (ufs_blkatoff(vp, (off_t)blkoff, NULL, &bp) != 0)
1.1 rumble 401: return (EJUSTRETURN);
402: }
403: dp = (struct direct *)(bp->b_data + (offset & bmask));
404: if (dp->d_reclen == 0 || dp->d_reclen >
405: dirblksiz - (offset & (dirblksiz - 1))) {
406: /* Corrupted directory. */
407: brelse(bp);
408: return (EJUSTRETURN);
409: }
410: if (dp->d_namlen == namelen &&
411: bcmp(dp->d_name, name, namelen) == 0) {
412: /* Found. Get the prev offset if needed. */
413: if (prevoffp != NULL) {
414: if (offset & (dirblksiz - 1)) {
415: prevoff = ufsdirhash_getprev(dp,
416: offset, dirblksiz);
417: if (prevoff == -1) {
418: brelse(bp);
419: return (EJUSTRETURN);
420: }
421: } else
422: prevoff = offset;
423: *prevoffp = prevoff;
424: }
425:
426: /* Check for sequential access, and update offset. */
427: if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
428: dh->dh_seqopt = 1;
429: dh->dh_seqoff = offset + DIRSIZ(0, dp, needswap);
430:
431: *bpp = bp;
432: *offp = offset;
433: return (0);
434: }
435:
436: DIRHASH_LOCK(dh);
437: if (dh->dh_hash == NULL) {
438: DIRHASH_UNLOCK(dh);
439: if (bp != NULL)
440: brelse(bp);
441: ufsdirhash_free(ip);
442: return (EJUSTRETURN);
443: }
444: /*
445: * When the name doesn't match in the seqopt case, go back
446: * and search normally.
447: */
448: if (dh->dh_seqopt) {
449: dh->dh_seqopt = 0;
450: goto restart;
451: }
452: }
453: DIRHASH_UNLOCK(dh);
454: if (bp != NULL)
455: brelse(bp);
456: return (ENOENT);
457: }
458:
459: /*
460: * Find a directory block with room for 'slotneeded' bytes. Returns
461: * the offset of the directory entry that begins the free space.
462: * This will either be the offset of an existing entry that has free
463: * space at the end, or the offset of an entry with d_ino == 0 at
464: * the start of a DIRBLKSIZ block.
465: *
466: * To use the space, the caller may need to compact existing entries in
467: * the directory. The total number of bytes in all of the entries involved
468: * in the compaction is stored in *slotsize. In other words, all of
469: * the entries that must be compacted are exactly contained in the
470: * region beginning at the returned offset and spanning *slotsize bytes.
471: *
472: * Returns -1 if no space was found, indicating that the directory
473: * must be extended.
474: */
475: doff_t
476: ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
477: {
478: struct direct *dp;
479: struct dirhash *dh;
480: struct buf *bp;
481: doff_t pos, slotstart;
482: int dirblock, error, freebytes, i;
1.2 perry 483: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
1.1 rumble 484: int dirblksiz = ip->i_ump->um_dirblksiz;
485:
486: if ((dh = ip->i_dirhash) == NULL)
487: return (-1);
488: DIRHASH_LOCK(dh);
489: if (dh->dh_hash == NULL) {
490: DIRHASH_UNLOCK(dh);
491: ufsdirhash_free(ip);
492: return (-1);
493: }
494:
495: /* Find a directory block with the desired free space. */
496: dirblock = -1;
497: for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
498: if ((dirblock = dh->dh_firstfree[i]) != -1)
499: break;
500: if (dirblock == -1) {
501: DIRHASH_UNLOCK(dh);
502: return (-1);
503: }
504:
505: KASSERT(dirblock < dh->dh_nblk &&
506: dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN));
507: DIRHASH_UNLOCK(dh);
508: pos = dirblock * dirblksiz;
1.10 yamt 509: error = ufs_blkatoff(ip->i_vnode, (off_t)pos, (void *)&dp, &bp);
1.1 rumble 510: if (error)
511: return (-1);
512: /* Find the first entry with free space. */
513: for (i = 0; i < dirblksiz; ) {
514: if (dp->d_reclen == 0) {
515: brelse(bp);
516: return (-1);
517: }
518: if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp, needswap))
519: break;
520: i += dp->d_reclen;
521: dp = (struct direct *)((char *)dp + dp->d_reclen);
522: }
523: if (i > dirblksiz) {
524: brelse(bp);
525: return (-1);
526: }
527: slotstart = pos + i;
528:
529: /* Find the range of entries needed to get enough space */
530: freebytes = 0;
531: while (i < dirblksiz && freebytes < slotneeded) {
532: freebytes += dp->d_reclen;
533: if (dp->d_ino != 0)
534: freebytes -= DIRSIZ(0, dp, needswap);
535: if (dp->d_reclen == 0) {
536: brelse(bp);
537: return (-1);
538: }
539: i += dp->d_reclen;
540: dp = (struct direct *)((char *)dp + dp->d_reclen);
541: }
542: if (i > dirblksiz) {
543: brelse(bp);
544: return (-1);
545: }
546: if (freebytes < slotneeded)
547: panic("ufsdirhash_findfree: free mismatch");
548: brelse(bp);
549: *slotsize = pos + i - slotstart;
550: return (slotstart);
551: }
552:
553: /*
554: * Return the start of the unused space at the end of a directory, or
555: * -1 if there are no trailing unused blocks.
556: */
557: doff_t
558: ufsdirhash_enduseful(struct inode *ip)
559: {
560: struct dirhash *dh;
561: int i;
562: int dirblksiz = ip->i_ump->um_dirblksiz;
563:
564: if ((dh = ip->i_dirhash) == NULL)
565: return (-1);
566: DIRHASH_LOCK(dh);
567: if (dh->dh_hash == NULL) {
568: DIRHASH_UNLOCK(dh);
569: ufsdirhash_free(ip);
570: return (-1);
571: }
572:
573: if (dh->dh_blkfree[dh->dh_dirblks - 1] != dirblksiz / DIRALIGN) {
574: DIRHASH_UNLOCK(dh);
575: return (-1);
576: }
577:
578: for (i = dh->dh_dirblks - 1; i >= 0; i--)
579: if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
580: break;
581: DIRHASH_UNLOCK(dh);
582: return ((doff_t)(i + 1) * dirblksiz);
583: }
584:
585: /*
586: * Insert information into the hash about a new directory entry. dirp
587: * points to a struct direct containing the entry, and offset specifies
588: * the offset of this entry.
589: */
590: void
591: ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
592: {
593: struct dirhash *dh;
594: int slot;
595: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
596: int dirblksiz = ip->i_ump->um_dirblksiz;
597:
598: if ((dh = ip->i_dirhash) == NULL)
599: return;
600: DIRHASH_LOCK(dh);
601: if (dh->dh_hash == NULL) {
602: DIRHASH_UNLOCK(dh);
603: ufsdirhash_free(ip);
604: return;
605: }
606:
607: KASSERT(offset < dh->dh_dirblks * dirblksiz);
608: /*
609: * Normal hash usage is < 66%. If the usage gets too high then
610: * remove the hash entirely and let it be rebuilt later.
611: */
612: if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
613: DIRHASH_UNLOCK(dh);
614: ufsdirhash_free(ip);
615: return;
616: }
617:
618: /* Find a free hash slot (empty or deleted), and add the entry. */
619: slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
620: while (DH_ENTRY(dh, slot) >= 0)
621: slot = WRAPINCR(slot, dh->dh_hlen);
622: if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
623: dh->dh_hused++;
624: DH_ENTRY(dh, slot) = offset;
625:
626: /* Update the per-block summary info. */
627: ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp, needswap), dirblksiz);
628: DIRHASH_UNLOCK(dh);
629: }
630:
631: /*
632: * Remove the specified directory entry from the hash. The entry to remove
633: * is defined by the name in `dirp', which must exist at the specified
634: * `offset' within the directory.
635: */
636: void
637: ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
638: {
639: struct dirhash *dh;
640: int slot;
641: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
642: int dirblksiz = ip->i_ump->um_dirblksiz;
643:
644: if ((dh = ip->i_dirhash) == NULL)
645: return;
646: DIRHASH_LOCK(dh);
647: if (dh->dh_hash == NULL) {
648: DIRHASH_UNLOCK(dh);
649: ufsdirhash_free(ip);
650: return;
651: }
652:
653: KASSERT(offset < dh->dh_dirblks * dirblksiz);
654: /* Find the entry */
655: slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
656:
657: /* Remove the hash entry. */
658: ufsdirhash_delslot(dh, slot);
659:
660: /* Update the per-block summary info. */
661: ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp, needswap), dirblksiz);
662: DIRHASH_UNLOCK(dh);
663: }
664:
665: /*
666: * Change the offset associated with a directory entry in the hash. Used
667: * when compacting directory blocks.
668: */
669: void
670: ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
671: doff_t newoff)
672: {
673: struct dirhash *dh;
674: int slot;
675:
676: if ((dh = ip->i_dirhash) == NULL)
677: return;
678: DIRHASH_LOCK(dh);
679: if (dh->dh_hash == NULL) {
680: DIRHASH_UNLOCK(dh);
681: ufsdirhash_free(ip);
682: return;
683: }
684:
685: KASSERT(oldoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz &&
686: newoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz);
687: /* Find the entry, and update the offset. */
688: slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
689: DH_ENTRY(dh, slot) = newoff;
690: DIRHASH_UNLOCK(dh);
691: }
692:
693: /*
694: * Inform dirhash that the directory has grown by one block that
695: * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
696: */
697: void
698: ufsdirhash_newblk(struct inode *ip, doff_t offset)
699: {
700: struct dirhash *dh;
701: int block;
702: int dirblksiz = ip->i_ump->um_dirblksiz;
703:
704: if ((dh = ip->i_dirhash) == NULL)
705: return;
706: DIRHASH_LOCK(dh);
707: if (dh->dh_hash == NULL) {
708: DIRHASH_UNLOCK(dh);
709: ufsdirhash_free(ip);
710: return;
711: }
712:
713: KASSERT(offset == dh->dh_dirblks * dirblksiz);
714: block = offset / dirblksiz;
715: if (block >= dh->dh_nblk) {
716: /* Out of space; must rebuild. */
717: DIRHASH_UNLOCK(dh);
718: ufsdirhash_free(ip);
719: return;
720: }
721: dh->dh_dirblks = block + 1;
722:
723: /* Account for the new free block. */
724: dh->dh_blkfree[block] = dirblksiz / DIRALIGN;
725: if (dh->dh_firstfree[DH_NFSTATS] == -1)
726: dh->dh_firstfree[DH_NFSTATS] = block;
727: DIRHASH_UNLOCK(dh);
728: }
729:
730: /*
731: * Inform dirhash that the directory is being truncated.
732: */
733: void
734: ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
735: {
736: struct dirhash *dh;
737: int block, i;
738: int dirblksiz = ip->i_ump->um_dirblksiz;
739:
740: if ((dh = ip->i_dirhash) == NULL)
741: return;
742: DIRHASH_LOCK(dh);
743: if (dh->dh_hash == NULL) {
744: DIRHASH_UNLOCK(dh);
745: ufsdirhash_free(ip);
746: return;
747: }
748:
749: KASSERT(offset <= dh->dh_dirblks * dirblksiz);
750: block = howmany(offset, dirblksiz);
751: /*
752: * If the directory shrinks to less than 1/8 of dh_nblk blocks
753: * (about 20% of its original size due to the 50% extra added in
754: * ufsdirhash_build) then free it, and let the caller rebuild
755: * if necessary.
756: */
757: if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
758: DIRHASH_UNLOCK(dh);
759: ufsdirhash_free(ip);
760: return;
761: }
762:
763: /*
764: * Remove any `first free' information pertaining to the
765: * truncated blocks. All blocks we're removing should be
766: * completely unused.
767: */
768: if (dh->dh_firstfree[DH_NFSTATS] >= block)
769: dh->dh_firstfree[DH_NFSTATS] = -1;
770: for (i = block; i < dh->dh_dirblks; i++)
771: if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
772: panic("ufsdirhash_dirtrunc: blocks in use");
773: for (i = 0; i < DH_NFSTATS; i++)
774: if (dh->dh_firstfree[i] >= block)
775: panic("ufsdirhash_dirtrunc: first free corrupt");
776: dh->dh_dirblks = block;
777: DIRHASH_UNLOCK(dh);
778: }
779:
780: /*
781: * Debugging function to check that the dirhash information about
782: * a directory block matches its actual contents. Panics if a mismatch
783: * is detected.
784: *
1.3 christos 785: * On entry, `sbuf' should point to the start of an in-core
1.1 rumble 786: * DIRBLKSIZ-sized directory block, and `offset' should contain the
787: * offset from the start of the directory of that block.
788: */
789: void
1.3 christos 790: ufsdirhash_checkblock(struct inode *ip, char *sbuf, doff_t offset)
1.1 rumble 791: {
792: struct dirhash *dh;
793: struct direct *dp;
794: int block, ffslot, i, nfree;
795: const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
796: int dirblksiz = ip->i_ump->um_dirblksiz;
797:
798: if (!ufs_dirhashcheck)
799: return;
800: if ((dh = ip->i_dirhash) == NULL)
801: return;
802: DIRHASH_LOCK(dh);
803: if (dh->dh_hash == NULL) {
804: DIRHASH_UNLOCK(dh);
805: ufsdirhash_free(ip);
806: return;
807: }
808:
809: block = offset / dirblksiz;
810: if ((offset & (dirblksiz - 1)) != 0 || block >= dh->dh_dirblks)
811: panic("ufsdirhash_checkblock: bad offset");
812:
813: nfree = 0;
814: for (i = 0; i < dirblksiz; i += dp->d_reclen) {
1.3 christos 815: dp = (struct direct *)(sbuf + i);
1.1 rumble 816: if (dp->d_reclen == 0 || i + dp->d_reclen > dirblksiz)
817: panic("ufsdirhash_checkblock: bad dir");
818:
819: if (dp->d_ino == 0) {
820: #if 0
821: /*
822: * XXX entries with d_ino == 0 should only occur
823: * at the start of a DIRBLKSIZ block. However the
824: * ufs code is tolerant of such entries at other
825: * offsets, and fsck does not fix them.
826: */
827: if (i != 0)
828: panic("ufsdirhash_checkblock: bad dir inode");
829: #endif
830: nfree += dp->d_reclen;
831: continue;
832: }
833:
834: /* Check that the entry exists (will panic if it doesn't). */
835: ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
836:
837: nfree += dp->d_reclen - DIRSIZ(0, dp, needswap);
838: }
839: if (i != dirblksiz)
840: panic("ufsdirhash_checkblock: bad dir end");
841:
842: if (dh->dh_blkfree[block] * DIRALIGN != nfree)
843: panic("ufsdirhash_checkblock: bad free count");
844:
845: ffslot = BLKFREE2IDX(nfree / DIRALIGN);
846: for (i = 0; i <= DH_NFSTATS; i++)
847: if (dh->dh_firstfree[i] == block && i != ffslot)
848: panic("ufsdirhash_checkblock: bad first-free");
849: if (dh->dh_firstfree[ffslot] == -1)
850: panic("ufsdirhash_checkblock: missing first-free entry");
851: DIRHASH_UNLOCK(dh);
852: }
853:
854: /*
855: * Hash the specified filename into a dirhash slot.
856: */
857: static int
858: ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen)
859: {
860: u_int32_t hash;
861:
862: /*
863: * We hash the name and then some other bit of data that is
864: * invariant over the dirhash's lifetime. Otherwise names
865: * differing only in the last byte are placed close to one
866: * another in the table, which is bad for linear probing.
867: */
868: hash = hash32_buf(name, namelen, HASH32_BUF_INIT);
869: hash = hash32_buf(&dh, sizeof(dh), hash);
870: return (hash % dh->dh_hlen);
871: }
872:
873: /*
874: * Adjust the number of free bytes in the block containing `offset'
875: * by the value specified by `diff'.
876: *
877: * The caller must ensure we have exclusive access to `dh'; normally
878: * that means that dh_mtx should be held, but this is also called
879: * from ufsdirhash_build() where exclusive access can be assumed.
880: */
881: static void
882: ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, int dirblksiz)
883: {
884: int block, i, nfidx, ofidx;
885:
886: /* Update the per-block summary info. */
887: block = offset / dirblksiz;
888: KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks);
889: ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
890: dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
891: nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
892:
893: /* Update the `first free' list if necessary. */
894: if (ofidx != nfidx) {
895: /* If removing, scan forward for the next block. */
896: if (dh->dh_firstfree[ofidx] == block) {
897: for (i = block + 1; i < dh->dh_dirblks; i++)
898: if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
899: break;
900: dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
901: }
902:
903: /* Make this the new `first free' if necessary */
904: if (dh->dh_firstfree[nfidx] > block ||
905: dh->dh_firstfree[nfidx] == -1)
906: dh->dh_firstfree[nfidx] = block;
907: }
908: }
909:
910: /*
911: * Find the specified name which should have the specified offset.
912: * Returns a slot number, and panics on failure.
913: *
914: * `dh' must be locked on entry and remains so on return.
915: */
916: static int
917: ufsdirhash_findslot(struct dirhash *dh, const char *name, int namelen,
918: doff_t offset)
919: {
920: int slot;
921:
922: /* Find the entry. */
923: KASSERT(dh->dh_hused < dh->dh_hlen);
924: slot = ufsdirhash_hash(dh, name, namelen);
925: while (DH_ENTRY(dh, slot) != offset &&
926: DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
927: slot = WRAPINCR(slot, dh->dh_hlen);
928: if (DH_ENTRY(dh, slot) != offset)
929: panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
930:
931: return (slot);
932: }
933:
934: /*
935: * Remove the entry corresponding to the specified slot from the hash array.
936: *
937: * `dh' must be locked on entry and remains so on return.
938: */
939: static void
940: ufsdirhash_delslot(struct dirhash *dh, int slot)
941: {
942: int i;
943:
944: /* Mark the entry as deleted. */
945: DH_ENTRY(dh, slot) = DIRHASH_DEL;
946:
947: /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
948: for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
949: i = WRAPINCR(i, dh->dh_hlen);
950: if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
951: i = WRAPDECR(i, dh->dh_hlen);
952: while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
953: DH_ENTRY(dh, i) = DIRHASH_EMPTY;
954: dh->dh_hused--;
955: i = WRAPDECR(i, dh->dh_hlen);
956: }
957: KASSERT(dh->dh_hused >= 0);
958: }
959: }
960:
961: /*
962: * Given a directory entry and its offset, find the offset of the
963: * previous entry in the same DIRBLKSIZ-sized block. Returns an
964: * offset, or -1 if there is no previous entry in the block or some
965: * other problem occurred.
966: */
967: static doff_t
968: ufsdirhash_getprev(struct direct *dirp, doff_t offset, int dirblksiz)
969: {
970: struct direct *dp;
971: char *blkbuf;
972: doff_t blkoff, prevoff;
973: int entrypos, i;
974:
975: blkoff = offset & ~(dirblksiz - 1); /* offset of start of block */
976: entrypos = offset & (dirblksiz - 1); /* entry relative to block */
977: blkbuf = (char *)dirp - entrypos;
978: prevoff = blkoff;
979:
980: /* If `offset' is the start of a block, there is no previous entry. */
981: if (entrypos == 0)
982: return (-1);
983:
984: /* Scan from the start of the block until we get to the entry. */
985: for (i = 0; i < entrypos; i += dp->d_reclen) {
986: dp = (struct direct *)(blkbuf + i);
987: if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
988: return (-1); /* Corrupted directory. */
989: prevoff = blkoff + i;
990: }
991: return (prevoff);
992: }
993:
994: /*
995: * Try to free up `wanted' bytes by stealing memory from existing
996: * dirhashes. Returns zero with list locked if successful.
997: */
998: static int
999: ufsdirhash_recycle(int wanted)
1000: {
1001: struct dirhash *dh;
1002: doff_t **hash;
1003: u_int8_t *blkfree;
1004: int i, mem, narrays;
1005:
1006: DIRHASHLIST_LOCK();
1007: while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1008: /* Find a dirhash, and lock it. */
1009: if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1010: DIRHASHLIST_UNLOCK();
1011: return (-1);
1012: }
1013: DIRHASH_LOCK(dh);
1014: KASSERT(dh->dh_hash != NULL);
1015:
1016: /* Decrement the score; only recycle if it becomes zero. */
1017: if (--dh->dh_score > 0) {
1018: DIRHASH_UNLOCK(dh);
1019: DIRHASHLIST_UNLOCK();
1020: return (-1);
1021: }
1022:
1023: /* Remove it from the list and detach its memory. */
1024: TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1025: dh->dh_onlist = 0;
1026: hash = dh->dh_hash;
1027: dh->dh_hash = NULL;
1028: blkfree = dh->dh_blkfree;
1029: dh->dh_blkfree = NULL;
1030: narrays = dh->dh_narrays;
1031: mem = narrays * sizeof(*dh->dh_hash) +
1032: narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1033: dh->dh_nblk * sizeof(*dh->dh_blkfree);
1034:
1035: /* Unlock everything, free the detached memory. */
1036: DIRHASH_UNLOCK(dh);
1037: DIRHASHLIST_UNLOCK();
1038: for (i = 0; i < narrays; i++)
1039: DIRHASH_BLKFREE(hash[i]);
1040: FREE(hash, M_DIRHASH);
1041: FREE(blkfree, M_DIRHASH);
1042:
1043: /* Account for the returned memory, and repeat if necessary. */
1044: DIRHASHLIST_LOCK();
1045: ufs_dirhashmem -= mem;
1046: }
1047: /* Success; return with list locked. */
1048: return (0);
1049: }
1050:
1051: void
1052: ufsdirhash_init()
1053: {
1054: #ifdef _LKM
1055: pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(daddr_t), 0, 0, 0,
1056: "ufsdirhash", &pool_allocator_nointr);
1057: #endif
1058: TAILQ_INIT(&ufsdirhash_list);
1059: }
1060:
1061: void
1.5 thorpej 1062: ufsdirhash_done(void)
1.1 rumble 1063: {
1064: KASSERT(TAILQ_EMPTY(&ufsdirhash_list));
1065: #ifdef _LKM
1066: pool_destroy(&ufsdirhash_pool);
1067: #endif
1068: }
1069:
1070: SYSCTL_SETUP(sysctl_vfs_ufs_setup, "sysctl vfs.ufs.dirhash subtree setup")
1071: {
1.4 atatat 1072: const struct sysctlnode *rnode, *cnode;
1.1 rumble 1073:
1074: sysctl_createv(clog, 0, NULL, &rnode,
1075: CTLFLAG_PERMANENT,
1076: CTLTYPE_NODE, "vfs", NULL,
1077: NULL, 0, NULL, 0,
1078: CTL_VFS, CTL_EOL);
1079:
1080: sysctl_createv(clog, 0, &rnode, &rnode,
1081: CTLFLAG_PERMANENT,
1082: CTLTYPE_NODE, "ufs",
1083: SYSCTL_DESCR("ufs"),
1084: NULL, 0, NULL, 0,
1085: CTL_CREATE, CTL_EOL);
1086:
1087: sysctl_createv(clog, 0, &rnode, &rnode,
1088: CTLFLAG_PERMANENT,
1089: CTLTYPE_NODE, "dirhash",
1090: SYSCTL_DESCR("dirhash"),
1091: NULL, 0, NULL, 0,
1092: CTL_CREATE, CTL_EOL);
1093:
1094: sysctl_createv(clog, 0, &rnode, &cnode,
1095: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1096: CTLTYPE_INT, "minblocks",
1097: SYSCTL_DESCR("minimum hashed directory size in blocks"),
1098: NULL, 0, &ufs_dirhashminblks, 0,
1099: CTL_CREATE, CTL_EOL);
1100:
1101: sysctl_createv(clog, 0, &rnode, &cnode,
1102: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1103: CTLTYPE_INT, "maxmem",
1104: SYSCTL_DESCR("maximum dirhash memory usage"),
1105: NULL, 0, &ufs_dirhashmaxmem, 0,
1106: CTL_CREATE, CTL_EOL);
1107:
1108: sysctl_createv(clog, 0, &rnode, &cnode,
1109: CTLFLAG_PERMANENT|CTLFLAG_READONLY,
1110: CTLTYPE_INT, "memused",
1111: SYSCTL_DESCR("current dirhash memory usage"),
1112: NULL, 0, &ufs_dirhashmem, 0,
1113: CTL_CREATE, CTL_EOL);
1114:
1115: sysctl_createv(clog, 0, &rnode, &cnode,
1116: CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1117: CTLTYPE_INT, "docheck",
1118: SYSCTL_DESCR("enable extra sanity checks"),
1119: NULL, 0, &ufs_dirhashcheck, 0,
1120: CTL_CREATE, CTL_EOL);
1121: }
CVSweb <webmaster@jp.NetBSD.org>