Annotation of src/sys/kern/vfs_lockf.c, Revision 1.73
1.73 ! dholland 1: /* $NetBSD: vfs_lockf.c,v 1.72 2009/08/05 19:39:50 dsl Exp $ */
1.5 cgd 2:
1.1 ws 3: /*
1.4 mycroft 4: * Copyright (c) 1982, 1986, 1989, 1993
5: * The Regents of the University of California. All rights reserved.
1.1 ws 6: *
7: * This code is derived from software contributed to Berkeley by
8: * Scooter Morris at Genentech Inc.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
1.33 agc 18: * 3. Neither the name of the University nor the names of its contributors
1.1 ws 19: * may be used to endorse or promote products derived from this software
20: * without specific prior written permission.
21: *
22: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32: * SUCH DAMAGE.
33: *
1.12 fvdl 34: * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94
1.1 ws 35: */
1.18 lukem 36:
37: #include <sys/cdefs.h>
1.73 ! dholland 38: __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.72 2009/08/05 19:39:50 dsl Exp $");
1.1 ws 39:
40: #include <sys/param.h>
41: #include <sys/systm.h>
42: #include <sys/kernel.h>
43: #include <sys/file.h>
44: #include <sys/proc.h>
45: #include <sys/vnode.h>
1.35 simonb 46: #include <sys/pool.h>
1.1 ws 47: #include <sys/fcntl.h>
48: #include <sys/lockf.h>
1.63 mrg 49: #include <sys/atomic.h>
1.49 elad 50: #include <sys/kauth.h>
1.69 pooka 51: #include <sys/uidinfo.h>
1.22 thorpej 52:
1.50 yamt 53: /*
54: * The lockf structure is a kernel structure which contains the information
55: * associated with a byte range lock. The lockf structures are linked into
1.60 ad 56: * the vnode structure. Locks are sorted by the starting byte of the lock for
1.50 yamt 57: * efficiency.
58: *
59: * lf_next is used for two purposes, depending on whether the lock is
60: * being held, or is in conflict with an existing lock. If this lock
61: * is held, it indicates the next lock on the same vnode.
62: * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
63: * must be queued on the lf_blkhd TAILQ of lock->lf_next.
64: */
65:
66: TAILQ_HEAD(locklist, lockf);
67:
68: struct lockf {
1.65 ad 69: kcondvar_t lf_cv; /* Signalling */
1.50 yamt 70: short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
71: short lf_type; /* Lock type: F_RDLCK, F_WRLCK */
72: off_t lf_start; /* The byte # of the start of the lock */
73: off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/
74: void *lf_id; /* process or file description holding lock */
75: struct lockf **lf_head; /* Back pointer to the head of lockf list */
76: struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */
77: struct locklist lf_blkhd; /* List of requests blocked on this lock */
78: TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
79: uid_t lf_uid; /* User ID responsible */
80: };
81:
82: /* Maximum length of sleep chains to traverse to try and detect deadlock. */
83: #define MAXDEPTH 50
84:
1.65 ad 85: static pool_cache_t lockf_cache;
86: static kmutex_t *lockf_lock;
87: static char lockstr[] = "lockf";
1.1 ws 88:
89: /*
1.6 mycroft 90: * This variable controls the maximum number of processes that will
91: * be checked in doing deadlock detection.
92: */
93: int maxlockdepth = MAXDEPTH;
94:
95: #ifdef LOCKF_DEBUG
96: int lockf_debug = 0;
97: #endif
98:
99: #define SELF 0x1
100: #define OTHERS 0x2
101:
102: /*
1.16 sommerfe 103: * XXX TODO
1.58 christos 104: * Misc cleanups: "void *id" should be visible in the API as a
1.16 sommerfe 105: * "struct proc *".
106: * (This requires rototilling all VFS's which support advisory locking).
107: */
108:
109: /*
110: * If there's a lot of lock contention on a single vnode, locking
111: * schemes which allow for more paralleism would be needed. Given how
112: * infrequently byte-range locks are actually used in typical BSD
113: * code, a more complex approach probably isn't worth it.
114: */
115:
116: /*
1.38 christos 117: * We enforce a limit on locks by uid, so that a single user cannot
118: * run the kernel out of memory. For now, the limit is pretty coarse.
119: * There is no limit on root.
120: *
121: * Splitting a lock will always succeed, regardless of current allocations.
122: * If you're slightly above the limit, we still have to permit an allocation
123: * so that the unlock can succeed. If the unlocking causes too many splits,
124: * however, you're totally cutoff.
125: */
126: int maxlocksperuid = 1024;
127:
1.45 thorpej 128: #ifdef LOCKF_DEBUG
129: /*
130: * Print out a lock.
131: */
132: static void
1.56 christos 133: lf_print(const char *tag, struct lockf *lock)
1.45 thorpej 134: {
135:
136: printf("%s: lock %p for ", tag, lock);
137: if (lock->lf_flags & F_POSIX)
138: printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
139: else
140: printf("file %p", (struct file *)lock->lf_id);
1.73 ! dholland 141: printf(" %s, start %jd, end %jd",
1.45 thorpej 142: lock->lf_type == F_RDLCK ? "shared" :
143: lock->lf_type == F_WRLCK ? "exclusive" :
144: lock->lf_type == F_UNLCK ? "unlock" :
1.73 ! dholland 145: "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
1.45 thorpej 146: if (TAILQ_FIRST(&lock->lf_blkhd))
147: printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
148: else
149: printf("\n");
150: }
151:
152: static void
1.56 christos 153: lf_printlist(const char *tag, struct lockf *lock)
1.45 thorpej 154: {
155: struct lockf *lf, *blk;
156:
157: printf("%s: Lock list:\n", tag);
158: for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
159: printf("\tlock %p for ", lf);
160: if (lf->lf_flags & F_POSIX)
161: printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
162: else
163: printf("file %p", (struct file *)lf->lf_id);
1.73 ! dholland 164: printf(", %s, start %jd, end %jd",
1.45 thorpej 165: lf->lf_type == F_RDLCK ? "shared" :
166: lf->lf_type == F_WRLCK ? "exclusive" :
167: lf->lf_type == F_UNLCK ? "unlock" :
1.73 ! dholland 168: "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
1.45 thorpej 169: TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
170: if (blk->lf_flags & F_POSIX)
1.66 skrll 171: printf("; proc %d",
1.45 thorpej 172: ((struct proc *)blk->lf_id)->p_pid);
173: else
1.66 skrll 174: printf("; file %p", (struct file *)blk->lf_id);
1.73 ! dholland 175: printf(", %s, start %jd, end %jd",
1.45 thorpej 176: blk->lf_type == F_RDLCK ? "shared" :
177: blk->lf_type == F_WRLCK ? "exclusive" :
178: blk->lf_type == F_UNLCK ? "unlock" :
1.73 ! dholland 179: "unknown", (intmax_t)blk->lf_start, (intmax_t)blk->lf_end);
1.45 thorpej 180: if (TAILQ_FIRST(&blk->lf_blkhd))
181: panic("lf_printlist: bad list");
182: }
183: printf("\n");
184: }
185: }
186: #endif /* LOCKF_DEBUG */
187:
1.38 christos 188: /*
189: * 3 options for allowfail.
190: * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit.
191: */
1.45 thorpej 192: static struct lockf *
1.71 yamt 193: lf_alloc(int allowfail)
1.38 christos 194: {
195: struct uidinfo *uip;
196: struct lockf *lock;
1.62 rmind 197: u_long lcnt;
1.71 yamt 198: const uid_t uid = kauth_cred_geteuid(kauth_cred_get());
1.38 christos 199:
200: uip = uid_find(uid);
1.62 rmind 201: lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt);
202: if (uid && allowfail && lcnt >
1.40 christos 203: (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) {
1.62 rmind 204: atomic_dec_ulong(&uip->ui_lockcnt);
1.40 christos 205: return NULL;
206: }
1.62 rmind 207:
1.65 ad 208: lock = pool_cache_get(lockf_cache, PR_WAITOK);
1.38 christos 209: lock->lf_uid = uid;
1.40 christos 210: return lock;
1.38 christos 211: }
212:
1.45 thorpej 213: static void
1.38 christos 214: lf_free(struct lockf *lock)
215: {
216: struct uidinfo *uip;
217:
218: uip = uid_find(lock->lf_uid);
1.62 rmind 219: atomic_dec_ulong(&uip->ui_lockcnt);
1.65 ad 220: pool_cache_put(lockf_cache, lock);
221: }
222:
223: static int
224: lf_ctor(void *arg, void *obj, int flag)
225: {
226: struct lockf *lock;
1.62 rmind 227:
1.65 ad 228: lock = obj;
229: cv_init(&lock->lf_cv, lockstr);
230:
231: return 0;
232: }
233:
234: static void
235: lf_dtor(void *arg, void *obj)
236: {
237: struct lockf *lock;
238:
239: lock = obj;
1.61 ad 240: cv_destroy(&lock->lf_cv);
1.38 christos 241: }
242:
243: /*
1.45 thorpej 244: * Walk the list of locks for an inode to
245: * find an overlapping lock (if any).
246: *
247: * NOTE: this returns only the FIRST overlapping lock. There
248: * may be more than one.
1.1 ws 249: */
1.45 thorpej 250: static int
251: lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
252: struct lockf ***prev, struct lockf **overlap)
1.1 ws 253: {
254: off_t start, end;
255:
1.45 thorpej 256: *overlap = lf;
1.54 yamt 257: if (lf == NULL)
1.45 thorpej 258: return 0;
259: #ifdef LOCKF_DEBUG
260: if (lockf_debug & 2)
261: lf_print("lf_findoverlap: looking for overlap in", lock);
262: #endif /* LOCKF_DEBUG */
263: start = lock->lf_start;
264: end = lock->lf_end;
1.54 yamt 265: while (lf != NULL) {
1.45 thorpej 266: if (((type == SELF) && lf->lf_id != lock->lf_id) ||
267: ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
268: *prev = &lf->lf_next;
269: *overlap = lf = lf->lf_next;
270: continue;
271: }
272: #ifdef LOCKF_DEBUG
273: if (lockf_debug & 2)
274: lf_print("\tchecking", lf);
275: #endif /* LOCKF_DEBUG */
1.1 ws 276: /*
1.45 thorpej 277: * OK, check for overlap
278: *
279: * Six cases:
280: * 0) no overlap
281: * 1) overlap == lock
282: * 2) overlap contains lock
283: * 3) lock contains overlap
284: * 4) overlap starts before lock
285: * 5) overlap ends after lock
1.1 ws 286: */
1.45 thorpej 287: if ((lf->lf_end != -1 && start > lf->lf_end) ||
288: (end != -1 && lf->lf_start > end)) {
289: /* Case 0 */
290: #ifdef LOCKF_DEBUG
291: if (lockf_debug & 2)
292: printf("no overlap\n");
293: #endif /* LOCKF_DEBUG */
294: if ((type & SELF) && end != -1 && lf->lf_start > end)
295: return 0;
296: *prev = &lf->lf_next;
297: *overlap = lf = lf->lf_next;
298: continue;
299: }
300: if ((lf->lf_start == start) && (lf->lf_end == end)) {
301: /* Case 1 */
302: #ifdef LOCKF_DEBUG
303: if (lockf_debug & 2)
304: printf("overlap == lock\n");
305: #endif /* LOCKF_DEBUG */
306: return 1;
307: }
308: if ((lf->lf_start <= start) &&
309: (end != -1) &&
310: ((lf->lf_end >= end) || (lf->lf_end == -1))) {
311: /* Case 2 */
312: #ifdef LOCKF_DEBUG
313: if (lockf_debug & 2)
314: printf("overlap contains lock\n");
315: #endif /* LOCKF_DEBUG */
316: return 2;
317: }
318: if (start <= lf->lf_start &&
319: (end == -1 ||
320: (lf->lf_end != -1 && end >= lf->lf_end))) {
321: /* Case 3 */
322: #ifdef LOCKF_DEBUG
323: if (lockf_debug & 2)
324: printf("lock contains overlap\n");
325: #endif /* LOCKF_DEBUG */
326: return 3;
327: }
328: if ((lf->lf_start < start) &&
329: ((lf->lf_end >= start) || (lf->lf_end == -1))) {
330: /* Case 4 */
331: #ifdef LOCKF_DEBUG
332: if (lockf_debug & 2)
333: printf("overlap starts before lock\n");
334: #endif /* LOCKF_DEBUG */
335: return 4;
336: }
337: if ((lf->lf_start > start) &&
338: (end != -1) &&
339: ((lf->lf_end > end) || (lf->lf_end == -1))) {
340: /* Case 5 */
341: #ifdef LOCKF_DEBUG
342: if (lockf_debug & 2)
343: printf("overlap ends after lock\n");
344: #endif /* LOCKF_DEBUG */
345: return 5;
346: }
347: panic("lf_findoverlap: default");
348: }
349: return 0;
350: }
1.1 ws 351:
1.45 thorpej 352: /*
353: * Split a lock and a contained region into
354: * two or three locks as necessary.
355: */
356: static void
357: lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock)
358: {
359: struct lockf *splitlock;
1.1 ws 360:
1.45 thorpej 361: #ifdef LOCKF_DEBUG
362: if (lockf_debug & 2) {
363: lf_print("lf_split", lock1);
364: lf_print("splitting from", lock2);
1.1 ws 365: }
1.45 thorpej 366: #endif /* LOCKF_DEBUG */
1.10 kleink 367: /*
1.45 thorpej 368: * Check to see if spliting into only two pieces.
1.27 yamt 369: */
1.45 thorpej 370: if (lock1->lf_start == lock2->lf_start) {
371: lock1->lf_start = lock2->lf_end + 1;
372: lock2->lf_next = lock1;
373: return;
1.27 yamt 374: }
1.45 thorpej 375: if (lock1->lf_end == lock2->lf_end) {
376: lock1->lf_end = lock2->lf_start - 1;
377: lock2->lf_next = lock1->lf_next;
378: lock1->lf_next = lock2;
379: return;
1.27 yamt 380: }
381: /*
1.45 thorpej 382: * Make a new lock consisting of the last part of
383: * the encompassing lock
1.10 kleink 384: */
1.45 thorpej 385: splitlock = *sparelock;
386: *sparelock = NULL;
1.70 yamt 387: cv_destroy(&splitlock->lf_cv);
1.45 thorpej 388: memcpy(splitlock, lock1, sizeof(*splitlock));
1.67 skrll 389: cv_init(&splitlock->lf_cv, lockstr);
390:
1.45 thorpej 391: splitlock->lf_start = lock2->lf_end + 1;
392: TAILQ_INIT(&splitlock->lf_blkhd);
393: lock1->lf_end = lock2->lf_start - 1;
1.1 ws 394: /*
1.45 thorpej 395: * OK, now link it in
1.21 thorpej 396: */
1.45 thorpej 397: splitlock->lf_next = lock1->lf_next;
398: lock2->lf_next = splitlock;
399: lock1->lf_next = lock2;
400: }
401:
402: /*
403: * Wakeup a blocklist
404: */
405: static void
406: lf_wakelock(struct lockf *listhead)
407: {
408: struct lockf *wakelock;
1.21 thorpej 409:
1.45 thorpej 410: while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
411: KASSERT(wakelock->lf_next == listhead);
412: TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
1.54 yamt 413: wakelock->lf_next = NULL;
1.45 thorpej 414: #ifdef LOCKF_DEBUG
415: if (lockf_debug & 2)
416: lf_print("lf_wakelock: awakening", wakelock);
417: #endif
1.61 ad 418: cv_broadcast(&wakelock->lf_cv);
1.21 thorpej 419: }
1.45 thorpej 420: }
421:
422: /*
423: * Remove a byte-range lock on an inode.
424: *
425: * Generally, find the lock (or an overlap to that lock)
426: * and remove it (or shrink it), then wakeup anyone we can.
427: */
428: static int
429: lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
430: {
431: struct lockf **head = unlock->lf_head;
432: struct lockf *lf = *head;
433: struct lockf *overlap, **prev;
434: int ovcase;
435:
1.54 yamt 436: if (lf == NULL)
1.45 thorpej 437: return 0;
438: #ifdef LOCKF_DEBUG
439: if (unlock->lf_type != F_UNLCK)
440: panic("lf_clearlock: bad type");
441: if (lockf_debug & 1)
442: lf_print("lf_clearlock", unlock);
443: #endif /* LOCKF_DEBUG */
444: prev = head;
445: while ((ovcase = lf_findoverlap(lf, unlock, SELF,
1.61 ad 446: &prev, &overlap)) != 0) {
1.45 thorpej 447: /*
448: * Wakeup the list of locks to be retried.
449: */
450: lf_wakelock(overlap);
451:
452: switch (ovcase) {
1.37 perry 453:
1.45 thorpej 454: case 1: /* overlap == lock */
455: *prev = overlap->lf_next;
456: lf_free(overlap);
457: break;
1.4 mycroft 458:
1.45 thorpej 459: case 2: /* overlap contains lock: split it */
460: if (overlap->lf_start == unlock->lf_start) {
461: overlap->lf_start = unlock->lf_end + 1;
462: break;
463: }
464: lf_split(overlap, unlock, sparelock);
465: overlap->lf_next = unlock->lf_next;
466: break;
1.1 ws 467:
1.45 thorpej 468: case 3: /* lock contains overlap */
469: *prev = overlap->lf_next;
470: lf = overlap->lf_next;
471: lf_free(overlap);
472: continue;
1.1 ws 473:
1.45 thorpej 474: case 4: /* overlap starts before lock */
475: overlap->lf_end = unlock->lf_start - 1;
476: prev = &overlap->lf_next;
477: lf = overlap->lf_next;
478: continue;
1.4 mycroft 479:
1.45 thorpej 480: case 5: /* overlap ends after lock */
481: overlap->lf_start = unlock->lf_end + 1;
482: break;
483: }
1.31 fvdl 484: break;
1.27 yamt 485: }
1.45 thorpej 486: #ifdef LOCKF_DEBUG
487: if (lockf_debug & 1)
488: lf_printlist("lf_clearlock", unlock);
489: #endif /* LOCKF_DEBUG */
490: return 0;
491: }
1.27 yamt 492:
1.45 thorpej 493: /*
494: * Walk the list of locks for an inode and
495: * return the first blocking lock.
496: */
497: static struct lockf *
498: lf_getblock(struct lockf *lock)
499: {
500: struct lockf **prev, *overlap, *lf = *(lock->lf_head);
1.27 yamt 501:
1.45 thorpej 502: prev = lock->lf_head;
503: while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {
504: /*
505: * We've found an overlap, see if it blocks us
506: */
507: if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
508: return overlap;
509: /*
510: * Nope, point to the next one on the list and
511: * see if it blocks us
512: */
513: lf = overlap->lf_next;
514: }
1.54 yamt 515: return NULL;
1.1 ws 516: }
517:
518: /*
519: * Set a byte-range lock.
520: */
1.24 yamt 521: static int
1.27 yamt 522: lf_setlock(struct lockf *lock, struct lockf **sparelock,
1.61 ad 523: kmutex_t *interlock)
1.1 ws 524: {
1.15 augustss 525: struct lockf *block;
1.1 ws 526: struct lockf **head = lock->lf_head;
527: struct lockf **prev, *overlap, *ltmp;
1.61 ad 528: int ovcase, needtolink, error;
1.1 ws 529:
530: #ifdef LOCKF_DEBUG
531: if (lockf_debug & 1)
532: lf_print("lf_setlock", lock);
533: #endif /* LOCKF_DEBUG */
534:
535: /*
536: * Scan lock list for this file looking for locks that would block us.
537: */
1.7 christos 538: while ((block = lf_getblock(lock)) != NULL) {
1.1 ws 539: /*
540: * Free the structure and return if nonblocking.
541: */
542: if ((lock->lf_flags & F_WAIT) == 0) {
1.38 christos 543: lf_free(lock);
1.29 yamt 544: return EAGAIN;
1.1 ws 545: }
546: /*
547: * We are blocked. Since flock style locks cover
548: * the whole file, there is no chance for deadlock.
549: * For byte-range locks we must check for deadlock.
550: *
551: * Deadlock detection is done by looking through the
552: * wait channels to see if there are any cycles that
553: * involve us. MAXDEPTH is set just to make sure we
1.16 sommerfe 554: * do not go off into neverneverland.
1.1 ws 555: */
556: if ((lock->lf_flags & F_POSIX) &&
557: (block->lf_flags & F_POSIX)) {
1.21 thorpej 558: struct lwp *wlwp;
1.48 perry 559: volatile const struct lockf *waitblock;
1.1 ws 560: int i = 0;
1.52 yamt 561: struct proc *p;
1.1 ws 562:
1.52 yamt 563: p = (struct proc *)block->lf_id;
564: KASSERT(p != NULL);
565: while (i++ < maxlockdepth) {
1.64 ad 566: mutex_enter(p->p_lock);
1.52 yamt 567: if (p->p_nlwps > 1) {
1.64 ad 568: mutex_exit(p->p_lock);
1.52 yamt 569: break;
570: }
571: wlwp = LIST_FIRST(&p->p_lwps);
1.57 ad 572: lwp_lock(wlwp);
1.65 ad 573: if (wlwp->l_wchan == NULL ||
574: wlwp->l_wmesg != lockstr) {
1.57 ad 575: lwp_unlock(wlwp);
1.64 ad 576: mutex_exit(p->p_lock);
1.52 yamt 577: break;
578: }
1.44 christos 579: waitblock = wlwp->l_wchan;
1.57 ad 580: lwp_unlock(wlwp);
1.64 ad 581: mutex_exit(p->p_lock);
1.1 ws 582: /* Get the owner of the blocking lock */
583: waitblock = waitblock->lf_next;
584: if ((waitblock->lf_flags & F_POSIX) == 0)
585: break;
1.52 yamt 586: p = (struct proc *)waitblock->lf_id;
587: if (p == curproc) {
1.38 christos 588: lf_free(lock);
1.29 yamt 589: return EDEADLK;
1.1 ws 590: }
591: }
1.16 sommerfe 592: /*
1.36 peter 593: * If we're still following a dependency chain
1.16 sommerfe 594: * after maxlockdepth iterations, assume we're in
595: * a cycle to be safe.
596: */
597: if (i >= maxlockdepth) {
1.38 christos 598: lf_free(lock);
1.29 yamt 599: return EDEADLK;
1.16 sommerfe 600: }
1.1 ws 601: }
602: /*
603: * For flock type locks, we must first remove
604: * any shared locks that we hold before we sleep
605: * waiting for an exclusive lock.
606: */
607: if ((lock->lf_flags & F_FLOCK) &&
608: lock->lf_type == F_WRLCK) {
609: lock->lf_type = F_UNLCK;
1.27 yamt 610: (void) lf_clearlock(lock, NULL);
1.1 ws 611: lock->lf_type = F_WRLCK;
612: }
613: /*
614: * Add our lock to the blocked list and sleep until we're free.
615: * Remember who blocked us (for deadlock detection).
616: */
617: lock->lf_next = block;
1.12 fvdl 618: TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
1.1 ws 619: #ifdef LOCKF_DEBUG
620: if (lockf_debug & 1) {
621: lf_print("lf_setlock: blocking on", block);
622: lf_printlist("lf_setlock", block);
623: }
624: #endif /* LOCKF_DEBUG */
1.61 ad 625: error = cv_wait_sig(&lock->lf_cv, interlock);
1.16 sommerfe 626:
627: /*
1.65 ad 628: * We may have been awoken by a signal (in
1.16 sommerfe 629: * which case we must remove ourselves from the
630: * blocked list) and/or by another process
631: * releasing a lock (in which case we have already
632: * been removed from the blocked list and our
1.54 yamt 633: * lf_next field set to NULL).
1.16 sommerfe 634: */
1.54 yamt 635: if (lock->lf_next != NULL) {
1.16 sommerfe 636: TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
1.54 yamt 637: lock->lf_next = NULL;
1.16 sommerfe 638: }
1.7 christos 639: if (error) {
1.38 christos 640: lf_free(lock);
1.29 yamt 641: return error;
1.1 ws 642: }
643: }
644: /*
645: * No blocks!! Add the lock. Note that we will
646: * downgrade or upgrade any overlapping locks this
647: * process already owns.
648: *
649: * Skip over locks owned by other processes.
650: * Handle any locks that overlap and are owned by ourselves.
651: */
652: prev = head;
653: block = *head;
654: needtolink = 1;
655: for (;;) {
1.7 christos 656: ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
657: if (ovcase)
1.1 ws 658: block = overlap->lf_next;
659: /*
660: * Six cases:
661: * 0) no overlap
662: * 1) overlap == lock
663: * 2) overlap contains lock
664: * 3) lock contains overlap
665: * 4) overlap starts before lock
666: * 5) overlap ends after lock
667: */
668: switch (ovcase) {
669: case 0: /* no overlap */
670: if (needtolink) {
671: *prev = lock;
672: lock->lf_next = overlap;
673: }
674: break;
675:
676: case 1: /* overlap == lock */
677: /*
678: * If downgrading lock, others may be
679: * able to acquire it.
680: */
681: if (lock->lf_type == F_RDLCK &&
682: overlap->lf_type == F_WRLCK)
683: lf_wakelock(overlap);
684: overlap->lf_type = lock->lf_type;
1.38 christos 685: lf_free(lock);
1.1 ws 686: lock = overlap; /* for debug output below */
687: break;
688:
689: case 2: /* overlap contains lock */
690: /*
691: * Check for common starting point and different types.
692: */
693: if (overlap->lf_type == lock->lf_type) {
1.38 christos 694: lf_free(lock);
1.1 ws 695: lock = overlap; /* for debug output below */
696: break;
697: }
698: if (overlap->lf_start == lock->lf_start) {
699: *prev = lock;
700: lock->lf_next = overlap;
701: overlap->lf_start = lock->lf_end + 1;
702: } else
1.27 yamt 703: lf_split(overlap, lock, sparelock);
1.1 ws 704: lf_wakelock(overlap);
705: break;
706:
707: case 3: /* lock contains overlap */
708: /*
709: * If downgrading lock, others may be able to
710: * acquire it, otherwise take the list.
711: */
712: if (lock->lf_type == F_RDLCK &&
713: overlap->lf_type == F_WRLCK) {
714: lf_wakelock(overlap);
715: } else {
1.19 matt 716: while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
1.16 sommerfe 717: KASSERT(ltmp->lf_next == overlap);
1.12 fvdl 718: TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
719: lf_block);
1.16 sommerfe 720: ltmp->lf_next = lock;
1.12 fvdl 721: TAILQ_INSERT_TAIL(&lock->lf_blkhd,
722: ltmp, lf_block);
723: }
1.1 ws 724: }
725: /*
726: * Add the new lock if necessary and delete the overlap.
727: */
728: if (needtolink) {
729: *prev = lock;
730: lock->lf_next = overlap->lf_next;
731: prev = &lock->lf_next;
732: needtolink = 0;
733: } else
734: *prev = overlap->lf_next;
1.39 christos 735: lf_free(overlap);
1.1 ws 736: continue;
737:
738: case 4: /* overlap starts before lock */
739: /*
740: * Add lock after overlap on the list.
741: */
742: lock->lf_next = overlap->lf_next;
743: overlap->lf_next = lock;
744: overlap->lf_end = lock->lf_start - 1;
745: prev = &lock->lf_next;
746: lf_wakelock(overlap);
747: needtolink = 0;
748: continue;
749:
750: case 5: /* overlap ends after lock */
751: /*
752: * Add the new lock before overlap.
753: */
754: if (needtolink) {
755: *prev = lock;
756: lock->lf_next = overlap;
757: }
758: overlap->lf_start = lock->lf_end + 1;
759: lf_wakelock(overlap);
760: break;
761: }
762: break;
763: }
764: #ifdef LOCKF_DEBUG
765: if (lockf_debug & 1) {
766: lf_print("lf_setlock: got the lock", lock);
767: lf_printlist("lf_setlock", lock);
768: }
769: #endif /* LOCKF_DEBUG */
1.29 yamt 770: return 0;
1.1 ws 771: }
772:
773: /*
774: * Check whether there is a blocking lock,
775: * and if so return its process identifier.
776: */
1.24 yamt 777: static int
1.25 yamt 778: lf_getlock(struct lockf *lock, struct flock *fl)
1.1 ws 779: {
1.15 augustss 780: struct lockf *block;
1.1 ws 781:
782: #ifdef LOCKF_DEBUG
783: if (lockf_debug & 1)
784: lf_print("lf_getlock", lock);
785: #endif /* LOCKF_DEBUG */
786:
1.7 christos 787: if ((block = lf_getblock(lock)) != NULL) {
1.1 ws 788: fl->l_type = block->lf_type;
789: fl->l_whence = SEEK_SET;
790: fl->l_start = block->lf_start;
791: if (block->lf_end == -1)
792: fl->l_len = 0;
793: else
794: fl->l_len = block->lf_end - block->lf_start + 1;
795: if (block->lf_flags & F_POSIX)
1.23 mycroft 796: fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
1.1 ws 797: else
798: fl->l_pid = -1;
799: } else {
800: fl->l_type = F_UNLCK;
801: }
1.29 yamt 802: return 0;
1.1 ws 803: }
804:
805: /*
1.45 thorpej 806: * Do an advisory lock operation.
1.1 ws 807: */
1.45 thorpej 808: int
809: lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
1.1 ws 810: {
1.45 thorpej 811: struct flock *fl = ap->a_fl;
812: struct lockf *lock = NULL;
813: struct lockf *sparelock;
1.65 ad 814: kmutex_t *interlock = lockf_lock;
1.45 thorpej 815: off_t start, end;
816: int error = 0;
1.1 ws 817:
1.45 thorpej 818: /*
819: * Convert the flock structure into a start and end.
820: */
821: switch (fl->l_whence) {
822: case SEEK_SET:
823: case SEEK_CUR:
1.1 ws 824: /*
1.45 thorpej 825: * Caller is responsible for adding any necessary offset
826: * when SEEK_CUR is used.
1.1 ws 827: */
1.45 thorpej 828: start = fl->l_start;
829: break;
830:
831: case SEEK_END:
832: start = size + fl->l_start;
833: break;
834:
835: default:
836: return EINVAL;
1.1 ws 837: }
1.72 dsl 838:
839: if (fl->l_len == 0)
840: end = -1;
841: else {
842: if (fl->l_len > 0)
843: end = start + fl->l_len - 1;
844: else {
845: /* lockf() allows -ve lengths */
846: end = start - 1;
847: start += fl->l_len;
848: }
849: }
1.45 thorpej 850: if (start < 0)
851: return EINVAL;
1.1 ws 852:
1.45 thorpej 853: /*
1.61 ad 854: * Allocate locks before acquiring the interlock. We need two
1.55 ad 855: * locks in the worst case.
1.45 thorpej 856: */
857: switch (ap->a_op) {
858: case F_SETLK:
859: case F_UNLCK:
1.1 ws 860: /*
1.55 ad 861: * XXX For F_UNLCK case, we can re-use the lock.
1.1 ws 862: */
1.46 christos 863: if ((ap->a_flags & F_FLOCK) == 0) {
1.45 thorpej 864: /*
1.55 ad 865: * Byte-range lock might need one more lock.
1.45 thorpej 866: */
1.71 yamt 867: sparelock = lf_alloc(0);
1.45 thorpej 868: if (sparelock == NULL) {
869: error = ENOMEM;
870: goto quit;
871: }
872: break;
1.1 ws 873: }
1.45 thorpej 874: /* FALLTHROUGH */
875:
876: case F_GETLK:
877: sparelock = NULL;
878: break;
879:
880: default:
881: return EINVAL;
882: }
883:
1.71 yamt 884: switch (ap->a_op) {
885: case F_SETLK:
886: lock = lf_alloc(1);
887: break;
888: case F_UNLCK:
889: if (start == 0 || end == -1) {
890: /* never split */
891: lock = lf_alloc(0);
892: } else {
893: /* might split */
894: lock = lf_alloc(2);
895: }
896: break;
897: case F_GETLK:
898: lock = lf_alloc(0);
899: break;
900: }
1.45 thorpej 901: if (lock == NULL) {
902: error = ENOMEM;
903: goto quit;
1.1 ws 904: }
905:
1.61 ad 906: mutex_enter(interlock);
1.1 ws 907:
908: /*
1.45 thorpej 909: * Avoid the common case of unlocking when inode has no locks.
1.1 ws 910: */
1.45 thorpej 911: if (*head == (struct lockf *)0) {
912: if (ap->a_op != F_SETLK) {
913: fl->l_type = F_UNLCK;
914: error = 0;
915: goto quit_unlock;
916: }
1.1 ws 917: }
1.45 thorpej 918:
1.1 ws 919: /*
1.45 thorpej 920: * Create the lockf structure.
921: */
922: lock->lf_start = start;
923: lock->lf_end = end;
924: lock->lf_head = head;
925: lock->lf_type = fl->l_type;
926: lock->lf_next = (struct lockf *)0;
927: TAILQ_INIT(&lock->lf_blkhd);
928: lock->lf_flags = ap->a_flags;
929: if (lock->lf_flags & F_POSIX) {
930: KASSERT(curproc == (struct proc *)ap->a_id);
931: }
1.72 dsl 932: lock->lf_id = ap->a_id;
1.45 thorpej 933:
1.1 ws 934: /*
1.45 thorpej 935: * Do the requested operation.
1.1 ws 936: */
1.45 thorpej 937: switch (ap->a_op) {
1.1 ws 938:
1.45 thorpej 939: case F_SETLK:
940: error = lf_setlock(lock, &sparelock, interlock);
941: lock = NULL; /* lf_setlock freed it */
942: break;
1.1 ws 943:
1.45 thorpej 944: case F_UNLCK:
945: error = lf_clearlock(lock, &sparelock);
946: break;
1.1 ws 947:
1.45 thorpej 948: case F_GETLK:
949: error = lf_getlock(lock, fl);
950: break;
1.37 perry 951:
1.45 thorpej 952: default:
953: break;
954: /* NOTREACHED */
955: }
1.1 ws 956:
1.45 thorpej 957: quit_unlock:
1.61 ad 958: mutex_exit(interlock);
1.45 thorpej 959: quit:
960: if (lock)
961: lf_free(lock);
962: if (sparelock)
963: lf_free(sparelock);
1.1 ws 964:
1.45 thorpej 965: return error;
1.1 ws 966: }
1.65 ad 967:
968: /*
969: * Initialize subsystem. XXX We use a global lock. This could be the
970: * vnode interlock, but the deadlock detection code may need to inspect
971: * locks belonging to other files.
972: */
973: void
974: lf_init(void)
975: {
976:
977: lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf",
978: NULL, IPL_NONE, lf_ctor, lf_dtor, NULL);
979: lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
980: }
CVSweb <webmaster@jp.NetBSD.org>