[BACK]Return to vfs_lockf.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / sys / kern

Annotation of src/sys/kern/vfs_lockf.c, Revision 1.54

1.49      elad        1: /*     $NetBSD$        */
1.5       cgd         2:
1.1       ws          3: /*
1.4       mycroft     4:  * Copyright (c) 1982, 1986, 1989, 1993
                      5:  *     The Regents of the University of California.  All rights reserved.
1.1       ws          6:  *
                      7:  * This code is derived from software contributed to Berkeley by
                      8:  * Scooter Morris at Genentech Inc.
                      9:  *
                     10:  * Redistribution and use in source and binary forms, with or without
                     11:  * modification, are permitted provided that the following conditions
                     12:  * are met:
                     13:  * 1. Redistributions of source code must retain the above copyright
                     14:  *    notice, this list of conditions and the following disclaimer.
                     15:  * 2. Redistributions in binary form must reproduce the above copyright
                     16:  *    notice, this list of conditions and the following disclaimer in the
                     17:  *    documentation and/or other materials provided with the distribution.
1.33      agc        18:  * 3. Neither the name of the University nor the names of its contributors
1.1       ws         19:  *    may be used to endorse or promote products derived from this software
                     20:  *    without specific prior written permission.
                     21:  *
                     22:  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
                     23:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
                     24:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
                     25:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
                     26:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
                     27:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
                     28:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
                     29:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
                     30:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
                     31:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
                     32:  * SUCH DAMAGE.
                     33:  *
1.12      fvdl       34:  *     @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94
1.1       ws         35:  */
1.18      lukem      36:
                     37: #include <sys/cdefs.h>
1.49      elad       38: __KERNEL_RCSID(0, "$NetBSD$");
1.1       ws         39:
                     40: #include <sys/param.h>
                     41: #include <sys/systm.h>
                     42: #include <sys/kernel.h>
                     43: #include <sys/file.h>
                     44: #include <sys/proc.h>
                     45: #include <sys/vnode.h>
1.35      simonb     46: #include <sys/pool.h>
1.1       ws         47: #include <sys/fcntl.h>
                     48: #include <sys/lockf.h>
1.49      elad       49: #include <sys/kauth.h>
1.22      thorpej    50:
1.50      yamt       51: /*
                     52:  * The lockf structure is a kernel structure which contains the information
                     53:  * associated with a byte range lock.  The lockf structures are linked into
                     54:  * the inode structure. Locks are sorted by the starting byte of the lock for
                     55:  * efficiency.
                     56:  *
                     57:  * lf_next is used for two purposes, depending on whether the lock is
                     58:  * being held, or is in conflict with an existing lock.  If this lock
                     59:  * is held, it indicates the next lock on the same vnode.
                     60:  * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
                     61:  * must be queued on the lf_blkhd TAILQ of lock->lf_next.
                     62:  */
                     63:
                     64: TAILQ_HEAD(locklist, lockf);
                     65:
                     66: struct lockf {
                     67:        short   lf_flags;        /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
                     68:        short   lf_type;         /* Lock type: F_RDLCK, F_WRLCK */
                     69:        off_t   lf_start;        /* The byte # of the start of the lock */
                     70:        off_t   lf_end;          /* The byte # of the end of the lock (-1=EOF)*/
                     71:        void    *lf_id;          /* process or file description holding lock */
                     72:        struct  lockf **lf_head; /* Back pointer to the head of lockf list */
                     73:        struct  lockf *lf_next;  /* Next lock on this vnode, or blocking lock */
                     74:        struct  locklist lf_blkhd; /* List of requests blocked on this lock */
                     75:        TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
                     76:        uid_t   lf_uid;          /* User ID responsible */
                     77: };
                     78:
                     79: /* Maximum length of sleep chains to traverse to try and detect deadlock. */
                     80: #define MAXDEPTH 50
                     81:
1.51      yamt       82: static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl",
1.35      simonb     83:     &pool_allocator_nointr);
1.1       ws         84:
                     85: /*
1.6       mycroft    86:  * This variable controls the maximum number of processes that will
                     87:  * be checked in doing deadlock detection.
                     88:  */
                     89: int maxlockdepth = MAXDEPTH;
                     90:
                     91: #ifdef LOCKF_DEBUG
                     92: int    lockf_debug = 0;
                     93: #endif
                     94:
                     95: #define SELF   0x1
                     96: #define OTHERS 0x2
                     97:
                     98: /*
1.16      sommerfe   99:  * XXX TODO
                    100:  * Misc cleanups: "caddr_t id" should be visible in the API as a
                    101:  * "struct proc *".
                    102:  * (This requires rototilling all VFS's which support advisory locking).
                    103:  */
                    104:
                    105: /*
                    106:  * If there's a lot of lock contention on a single vnode, locking
                    107:  * schemes which allow for more paralleism would be needed.  Given how
                    108:  * infrequently byte-range locks are actually used in typical BSD
                    109:  * code, a more complex approach probably isn't worth it.
                    110:  */
                    111:
                    112: /*
1.38      christos  113:  * We enforce a limit on locks by uid, so that a single user cannot
                    114:  * run the kernel out of memory.  For now, the limit is pretty coarse.
                    115:  * There is no limit on root.
                    116:  *
                    117:  * Splitting a lock will always succeed, regardless of current allocations.
                    118:  * If you're slightly above the limit, we still have to permit an allocation
                    119:  * so that the unlock can succeed.  If the unlocking causes too many splits,
                    120:  * however, you're totally cutoff.
                    121:  */
                    122: int maxlocksperuid = 1024;
                    123:
1.45      thorpej   124: #ifdef LOCKF_DEBUG
                    125: /*
                    126:  * Print out a lock.
                    127:  */
                    128: static void
                    129: lf_print(char *tag, struct lockf *lock)
                    130: {
                    131:
                    132:        printf("%s: lock %p for ", tag, lock);
                    133:        if (lock->lf_flags & F_POSIX)
                    134:                printf("proc %d", ((struct proc *)lock->lf_id)->p_pid);
                    135:        else
                    136:                printf("file %p", (struct file *)lock->lf_id);
                    137:        printf(" %s, start %qx, end %qx",
                    138:                lock->lf_type == F_RDLCK ? "shared" :
                    139:                lock->lf_type == F_WRLCK ? "exclusive" :
                    140:                lock->lf_type == F_UNLCK ? "unlock" :
                    141:                "unknown", lock->lf_start, lock->lf_end);
                    142:        if (TAILQ_FIRST(&lock->lf_blkhd))
                    143:                printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd));
                    144:        else
                    145:                printf("\n");
                    146: }
                    147:
                    148: static void
                    149: lf_printlist(char *tag, struct lockf *lock)
                    150: {
                    151:        struct lockf *lf, *blk;
                    152:
                    153:        printf("%s: Lock list:\n", tag);
                    154:        for (lf = *lock->lf_head; lf; lf = lf->lf_next) {
                    155:                printf("\tlock %p for ", lf);
                    156:                if (lf->lf_flags & F_POSIX)
                    157:                        printf("proc %d", ((struct proc *)lf->lf_id)->p_pid);
                    158:                else
                    159:                        printf("file %p", (struct file *)lf->lf_id);
                    160:                printf(", %s, start %qx, end %qx",
                    161:                        lf->lf_type == F_RDLCK ? "shared" :
                    162:                        lf->lf_type == F_WRLCK ? "exclusive" :
                    163:                        lf->lf_type == F_UNLCK ? "unlock" :
                    164:                        "unknown", lf->lf_start, lf->lf_end);
                    165:                TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
                    166:                        if (blk->lf_flags & F_POSIX)
                    167:                                printf("proc %d",
                    168:                                    ((struct proc *)blk->lf_id)->p_pid);
                    169:                        else
                    170:                                printf("file %p", (struct file *)blk->lf_id);
                    171:                        printf(", %s, start %qx, end %qx",
                    172:                                blk->lf_type == F_RDLCK ? "shared" :
                    173:                                blk->lf_type == F_WRLCK ? "exclusive" :
                    174:                                blk->lf_type == F_UNLCK ? "unlock" :
                    175:                                "unknown", blk->lf_start, blk->lf_end);
                    176:                        if (TAILQ_FIRST(&blk->lf_blkhd))
                    177:                                 panic("lf_printlist: bad list");
                    178:                }
                    179:                printf("\n");
                    180:        }
                    181: }
                    182: #endif /* LOCKF_DEBUG */
                    183:
1.38      christos  184: /*
                    185:  * 3 options for allowfail.
                    186:  * 0 - always allocate.  1 - cutoff at limit.  2 - cutoff at double limit.
                    187:  */
1.45      thorpej   188: static struct lockf *
1.38      christos  189: lf_alloc(uid_t uid, int allowfail)
                    190: {
                    191:        struct uidinfo *uip;
                    192:        struct lockf *lock;
1.41      christos  193:        int s;
1.38      christos  194:
                    195:        uip = uid_find(uid);
1.41      christos  196:        UILOCK(uip, s);
1.38      christos  197:        if (uid && allowfail && uip->ui_lockcnt >
1.40      christos  198:            (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) {
1.41      christos  199:                UIUNLOCK(uip, s);
1.40      christos  200:                return NULL;
                    201:        }
1.38      christos  202:        uip->ui_lockcnt++;
1.41      christos  203:        UIUNLOCK(uip, s);
1.38      christos  204:        lock = pool_get(&lockfpool, PR_WAITOK);
                    205:        lock->lf_uid = uid;
1.40      christos  206:        return lock;
1.38      christos  207: }
                    208:
1.45      thorpej   209: static void
1.38      christos  210: lf_free(struct lockf *lock)
                    211: {
                    212:        struct uidinfo *uip;
1.41      christos  213:        int s;
1.38      christos  214:
                    215:        uip = uid_find(lock->lf_uid);
1.41      christos  216:        UILOCK(uip, s);
1.38      christos  217:        uip->ui_lockcnt--;
1.41      christos  218:        UIUNLOCK(uip, s);
1.38      christos  219:        pool_put(&lockfpool, lock);
                    220: }
                    221:
                    222: /*
1.45      thorpej   223:  * Walk the list of locks for an inode to
                    224:  * find an overlapping lock (if any).
                    225:  *
                    226:  * NOTE: this returns only the FIRST overlapping lock.  There
                    227:  *      may be more than one.
1.1       ws        228:  */
1.45      thorpej   229: static int
                    230: lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                    231:     struct lockf ***prev, struct lockf **overlap)
1.1       ws        232: {
                    233:        off_t start, end;
                    234:
1.45      thorpej   235:        *overlap = lf;
1.54    ! yamt      236:        if (lf == NULL)
1.45      thorpej   237:                return 0;
                    238: #ifdef LOCKF_DEBUG
                    239:        if (lockf_debug & 2)
                    240:                lf_print("lf_findoverlap: looking for overlap in", lock);
                    241: #endif /* LOCKF_DEBUG */
                    242:        start = lock->lf_start;
                    243:        end = lock->lf_end;
1.54    ! yamt      244:        while (lf != NULL) {
1.45      thorpej   245:                if (((type == SELF) && lf->lf_id != lock->lf_id) ||
                    246:                    ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
                    247:                        *prev = &lf->lf_next;
                    248:                        *overlap = lf = lf->lf_next;
                    249:                        continue;
                    250:                }
                    251: #ifdef LOCKF_DEBUG
                    252:                if (lockf_debug & 2)
                    253:                        lf_print("\tchecking", lf);
                    254: #endif /* LOCKF_DEBUG */
1.1       ws        255:                /*
1.45      thorpej   256:                 * OK, check for overlap
                    257:                 *
                    258:                 * Six cases:
                    259:                 *      0) no overlap
                    260:                 *      1) overlap == lock
                    261:                 *      2) overlap contains lock
                    262:                 *      3) lock contains overlap
                    263:                 *      4) overlap starts before lock
                    264:                 *      5) overlap ends after lock
1.1       ws        265:                 */
1.45      thorpej   266:                if ((lf->lf_end != -1 && start > lf->lf_end) ||
                    267:                    (end != -1 && lf->lf_start > end)) {
                    268:                        /* Case 0 */
                    269: #ifdef LOCKF_DEBUG
                    270:                        if (lockf_debug & 2)
                    271:                                printf("no overlap\n");
                    272: #endif /* LOCKF_DEBUG */
                    273:                        if ((type & SELF) && end != -1 && lf->lf_start > end)
                    274:                                return 0;
                    275:                        *prev = &lf->lf_next;
                    276:                        *overlap = lf = lf->lf_next;
                    277:                        continue;
                    278:                }
                    279:                if ((lf->lf_start == start) && (lf->lf_end == end)) {
                    280:                        /* Case 1 */
                    281: #ifdef LOCKF_DEBUG
                    282:                        if (lockf_debug & 2)
                    283:                                printf("overlap == lock\n");
                    284: #endif /* LOCKF_DEBUG */
                    285:                        return 1;
                    286:                }
                    287:                if ((lf->lf_start <= start) &&
                    288:                    (end != -1) &&
                    289:                    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
                    290:                        /* Case 2 */
                    291: #ifdef LOCKF_DEBUG
                    292:                        if (lockf_debug & 2)
                    293:                                printf("overlap contains lock\n");
                    294: #endif /* LOCKF_DEBUG */
                    295:                        return 2;
                    296:                }
                    297:                if (start <= lf->lf_start &&
                    298:                           (end == -1 ||
                    299:                           (lf->lf_end != -1 && end >= lf->lf_end))) {
                    300:                        /* Case 3 */
                    301: #ifdef LOCKF_DEBUG
                    302:                        if (lockf_debug & 2)
                    303:                                printf("lock contains overlap\n");
                    304: #endif /* LOCKF_DEBUG */
                    305:                        return 3;
                    306:                }
                    307:                if ((lf->lf_start < start) &&
                    308:                        ((lf->lf_end >= start) || (lf->lf_end == -1))) {
                    309:                        /* Case 4 */
                    310: #ifdef LOCKF_DEBUG
                    311:                        if (lockf_debug & 2)
                    312:                                printf("overlap starts before lock\n");
                    313: #endif /* LOCKF_DEBUG */
                    314:                        return 4;
                    315:                }
                    316:                if ((lf->lf_start > start) &&
                    317:                        (end != -1) &&
                    318:                        ((lf->lf_end > end) || (lf->lf_end == -1))) {
                    319:                        /* Case 5 */
                    320: #ifdef LOCKF_DEBUG
                    321:                        if (lockf_debug & 2)
                    322:                                printf("overlap ends after lock\n");
                    323: #endif /* LOCKF_DEBUG */
                    324:                        return 5;
                    325:                }
                    326:                panic("lf_findoverlap: default");
                    327:        }
                    328:        return 0;
                    329: }
1.1       ws        330:
1.45      thorpej   331: /*
                    332:  * Split a lock and a contained region into
                    333:  * two or three locks as necessary.
                    334:  */
                    335: static void
                    336: lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock)
                    337: {
                    338:        struct lockf *splitlock;
1.1       ws        339:
1.45      thorpej   340: #ifdef LOCKF_DEBUG
                    341:        if (lockf_debug & 2) {
                    342:                lf_print("lf_split", lock1);
                    343:                lf_print("splitting from", lock2);
1.1       ws        344:        }
1.45      thorpej   345: #endif /* LOCKF_DEBUG */
1.10      kleink    346:        /*
1.45      thorpej   347:         * Check to see if spliting into only two pieces.
1.27      yamt      348:         */
1.45      thorpej   349:        if (lock1->lf_start == lock2->lf_start) {
                    350:                lock1->lf_start = lock2->lf_end + 1;
                    351:                lock2->lf_next = lock1;
                    352:                return;
1.27      yamt      353:        }
1.45      thorpej   354:        if (lock1->lf_end == lock2->lf_end) {
                    355:                lock1->lf_end = lock2->lf_start - 1;
                    356:                lock2->lf_next = lock1->lf_next;
                    357:                lock1->lf_next = lock2;
                    358:                return;
1.27      yamt      359:        }
                    360:        /*
1.45      thorpej   361:         * Make a new lock consisting of the last part of
                    362:         * the encompassing lock
1.10      kleink    363:         */
1.45      thorpej   364:        splitlock = *sparelock;
                    365:        *sparelock = NULL;
                    366:        memcpy(splitlock, lock1, sizeof(*splitlock));
                    367:        splitlock->lf_start = lock2->lf_end + 1;
                    368:        TAILQ_INIT(&splitlock->lf_blkhd);
                    369:        lock1->lf_end = lock2->lf_start - 1;
1.1       ws        370:        /*
1.45      thorpej   371:         * OK, now link it in
1.21      thorpej   372:         */
1.45      thorpej   373:        splitlock->lf_next = lock1->lf_next;
                    374:        lock2->lf_next = splitlock;
                    375:        lock1->lf_next = lock2;
                    376: }
                    377:
                    378: /*
                    379:  * Wakeup a blocklist
                    380:  */
                    381: static void
                    382: lf_wakelock(struct lockf *listhead)
                    383: {
                    384:        struct lockf *wakelock;
1.21      thorpej   385:
1.45      thorpej   386:        while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
                    387:                KASSERT(wakelock->lf_next == listhead);
                    388:                TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
1.54    ! yamt      389:                wakelock->lf_next = NULL;
1.45      thorpej   390: #ifdef LOCKF_DEBUG
                    391:                if (lockf_debug & 2)
                    392:                        lf_print("lf_wakelock: awakening", wakelock);
                    393: #endif
                    394:                wakeup(wakelock);
1.21      thorpej   395:        }
1.45      thorpej   396: }
                    397:
                    398: /*
                    399:  * Remove a byte-range lock on an inode.
                    400:  *
                    401:  * Generally, find the lock (or an overlap to that lock)
                    402:  * and remove it (or shrink it), then wakeup anyone we can.
                    403:  */
                    404: static int
                    405: lf_clearlock(struct lockf *unlock, struct lockf **sparelock)
                    406: {
                    407:        struct lockf **head = unlock->lf_head;
                    408:        struct lockf *lf = *head;
                    409:        struct lockf *overlap, **prev;
                    410:        int ovcase;
                    411:
1.54    ! yamt      412:        if (lf == NULL)
1.45      thorpej   413:                return 0;
                    414: #ifdef LOCKF_DEBUG
                    415:        if (unlock->lf_type != F_UNLCK)
                    416:                panic("lf_clearlock: bad type");
                    417:        if (lockf_debug & 1)
                    418:                lf_print("lf_clearlock", unlock);
                    419: #endif /* LOCKF_DEBUG */
                    420:        prev = head;
                    421:        while ((ovcase = lf_findoverlap(lf, unlock, SELF,
                    422:                                        &prev, &overlap)) != 0) {
                    423:                /*
                    424:                 * Wakeup the list of locks to be retried.
                    425:                 */
                    426:                lf_wakelock(overlap);
                    427:
                    428:                switch (ovcase) {
1.37      perry     429:
1.45      thorpej   430:                case 1: /* overlap == lock */
                    431:                        *prev = overlap->lf_next;
                    432:                        lf_free(overlap);
                    433:                        break;
1.4       mycroft   434:
1.45      thorpej   435:                case 2: /* overlap contains lock: split it */
                    436:                        if (overlap->lf_start == unlock->lf_start) {
                    437:                                overlap->lf_start = unlock->lf_end + 1;
                    438:                                break;
                    439:                        }
                    440:                        lf_split(overlap, unlock, sparelock);
                    441:                        overlap->lf_next = unlock->lf_next;
                    442:                        break;
1.1       ws        443:
1.45      thorpej   444:                case 3: /* lock contains overlap */
                    445:                        *prev = overlap->lf_next;
                    446:                        lf = overlap->lf_next;
                    447:                        lf_free(overlap);
                    448:                        continue;
1.1       ws        449:
1.45      thorpej   450:                case 4: /* overlap starts before lock */
                    451:                        overlap->lf_end = unlock->lf_start - 1;
                    452:                        prev = &overlap->lf_next;
                    453:                        lf = overlap->lf_next;
                    454:                        continue;
1.4       mycroft   455:
1.45      thorpej   456:                case 5: /* overlap ends after lock */
                    457:                        overlap->lf_start = unlock->lf_end + 1;
                    458:                        break;
                    459:                }
1.31      fvdl      460:                break;
1.27      yamt      461:        }
1.45      thorpej   462: #ifdef LOCKF_DEBUG
                    463:        if (lockf_debug & 1)
                    464:                lf_printlist("lf_clearlock", unlock);
                    465: #endif /* LOCKF_DEBUG */
                    466:        return 0;
                    467: }
1.27      yamt      468:
1.45      thorpej   469: /*
                    470:  * Walk the list of locks for an inode and
                    471:  * return the first blocking lock.
                    472:  */
                    473: static struct lockf *
                    474: lf_getblock(struct lockf *lock)
                    475: {
                    476:        struct lockf **prev, *overlap, *lf = *(lock->lf_head);
1.27      yamt      477:
1.45      thorpej   478:        prev = lock->lf_head;
                    479:        while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {
                    480:                /*
                    481:                 * We've found an overlap, see if it blocks us
                    482:                 */
                    483:                if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
                    484:                        return overlap;
                    485:                /*
                    486:                 * Nope, point to the next one on the list and
                    487:                 * see if it blocks us
                    488:                 */
                    489:                lf = overlap->lf_next;
                    490:        }
1.54    ! yamt      491:        return NULL;
1.1       ws        492: }
                    493:
                    494: /*
                    495:  * Set a byte-range lock.
                    496:  */
1.24      yamt      497: static int
1.27      yamt      498: lf_setlock(struct lockf *lock, struct lockf **sparelock,
                    499:     struct simplelock *interlock)
1.1       ws        500: {
1.15      augustss  501:        struct lockf *block;
1.1       ws        502:        struct lockf **head = lock->lf_head;
                    503:        struct lockf **prev, *overlap, *ltmp;
                    504:        static char lockstr[] = "lockf";
                    505:        int ovcase, priority, needtolink, error;
                    506:
                    507: #ifdef LOCKF_DEBUG
                    508:        if (lockf_debug & 1)
                    509:                lf_print("lf_setlock", lock);
                    510: #endif /* LOCKF_DEBUG */
                    511:
                    512:        /*
                    513:         * Set the priority
                    514:         */
                    515:        priority = PLOCK;
                    516:        if (lock->lf_type == F_WRLCK)
                    517:                priority += 4;
                    518:        priority |= PCATCH;
                    519:        /*
                    520:         * Scan lock list for this file looking for locks that would block us.
                    521:         */
1.7       christos  522:        while ((block = lf_getblock(lock)) != NULL) {
1.1       ws        523:                /*
                    524:                 * Free the structure and return if nonblocking.
                    525:                 */
                    526:                if ((lock->lf_flags & F_WAIT) == 0) {
1.38      christos  527:                        lf_free(lock);
1.29      yamt      528:                        return EAGAIN;
1.1       ws        529:                }
                    530:                /*
                    531:                 * We are blocked. Since flock style locks cover
                    532:                 * the whole file, there is no chance for deadlock.
                    533:                 * For byte-range locks we must check for deadlock.
                    534:                 *
                    535:                 * Deadlock detection is done by looking through the
                    536:                 * wait channels to see if there are any cycles that
                    537:                 * involve us. MAXDEPTH is set just to make sure we
1.16      sommerfe  538:                 * do not go off into neverneverland.
1.1       ws        539:                 */
                    540:                if ((lock->lf_flags & F_POSIX) &&
                    541:                    (block->lf_flags & F_POSIX)) {
1.21      thorpej   542:                        struct lwp *wlwp;
1.48      perry     543:                        volatile const struct lockf *waitblock;
1.1       ws        544:                        int i = 0;
1.52      yamt      545:                        struct proc *p;
1.1       ws        546:
1.52      yamt      547:                        p = (struct proc *)block->lf_id;
                    548:                        KASSERT(p != NULL);
                    549:                        while (i++ < maxlockdepth) {
                    550:                                simple_lock(&p->p_lock);
                    551:                                if (p->p_nlwps > 1) {
                    552:                                        simple_unlock(&p->p_lock);
                    553:                                        break;
                    554:                                }
                    555:                                wlwp = LIST_FIRST(&p->p_lwps);
                    556:                                if (wlwp->l_wmesg != lockstr) {
                    557:                                        simple_unlock(&p->p_lock);
                    558:                                        break;
                    559:                                }
                    560:                                simple_unlock(&p->p_lock);
1.44      christos  561:                                waitblock = wlwp->l_wchan;
1.52      yamt      562:                                if (waitblock == NULL) {
                    563:                                        /*
                    564:                                         * this lwp just got up but
                    565:                                         * not returned from ltsleep yet.
                    566:                                         */
                    567:                                        break;
                    568:                                }
1.1       ws        569:                                /* Get the owner of the blocking lock */
                    570:                                waitblock = waitblock->lf_next;
                    571:                                if ((waitblock->lf_flags & F_POSIX) == 0)
                    572:                                        break;
1.52      yamt      573:                                p = (struct proc *)waitblock->lf_id;
                    574:                                if (p == curproc) {
1.38      christos  575:                                        lf_free(lock);
1.29      yamt      576:                                        return EDEADLK;
1.1       ws        577:                                }
                    578:                        }
1.16      sommerfe  579:                        /*
1.36      peter     580:                         * If we're still following a dependency chain
1.16      sommerfe  581:                         * after maxlockdepth iterations, assume we're in
                    582:                         * a cycle to be safe.
                    583:                         */
                    584:                        if (i >= maxlockdepth) {
1.38      christos  585:                                lf_free(lock);
1.29      yamt      586:                                return EDEADLK;
1.16      sommerfe  587:                        }
1.1       ws        588:                }
                    589:                /*
                    590:                 * For flock type locks, we must first remove
                    591:                 * any shared locks that we hold before we sleep
                    592:                 * waiting for an exclusive lock.
                    593:                 */
                    594:                if ((lock->lf_flags & F_FLOCK) &&
                    595:                    lock->lf_type == F_WRLCK) {
                    596:                        lock->lf_type = F_UNLCK;
1.27      yamt      597:                        (void) lf_clearlock(lock, NULL);
1.1       ws        598:                        lock->lf_type = F_WRLCK;
                    599:                }
                    600:                /*
                    601:                 * Add our lock to the blocked list and sleep until we're free.
                    602:                 * Remember who blocked us (for deadlock detection).
                    603:                 */
                    604:                lock->lf_next = block;
1.12      fvdl      605:                TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
1.1       ws        606: #ifdef LOCKF_DEBUG
                    607:                if (lockf_debug & 1) {
                    608:                        lf_print("lf_setlock: blocking on", block);
                    609:                        lf_printlist("lf_setlock", block);
                    610:                }
                    611: #endif /* LOCKF_DEBUG */
1.27      yamt      612:                error = ltsleep(lock, priority, lockstr, 0, interlock);
1.16      sommerfe  613:
                    614:                /*
                    615:                 * We may have been awakened by a signal (in
                    616:                 * which case we must remove ourselves from the
                    617:                 * blocked list) and/or by another process
                    618:                 * releasing a lock (in which case we have already
                    619:                 * been removed from the blocked list and our
1.54    ! yamt      620:                 * lf_next field set to NULL).
1.16      sommerfe  621:                 */
1.54    ! yamt      622:                if (lock->lf_next != NULL) {
1.16      sommerfe  623:                        TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
1.54    ! yamt      624:                        lock->lf_next = NULL;
1.16      sommerfe  625:                }
1.7       christos  626:                if (error) {
1.38      christos  627:                        lf_free(lock);
1.29      yamt      628:                        return error;
1.1       ws        629:                }
                    630:        }
                    631:        /*
                    632:         * No blocks!!  Add the lock.  Note that we will
                    633:         * downgrade or upgrade any overlapping locks this
                    634:         * process already owns.
                    635:         *
                    636:         * Skip over locks owned by other processes.
                    637:         * Handle any locks that overlap and are owned by ourselves.
                    638:         */
                    639:        prev = head;
                    640:        block = *head;
                    641:        needtolink = 1;
                    642:        for (;;) {
1.7       christos  643:                ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
                    644:                if (ovcase)
1.1       ws        645:                        block = overlap->lf_next;
                    646:                /*
                    647:                 * Six cases:
                    648:                 *      0) no overlap
                    649:                 *      1) overlap == lock
                    650:                 *      2) overlap contains lock
                    651:                 *      3) lock contains overlap
                    652:                 *      4) overlap starts before lock
                    653:                 *      5) overlap ends after lock
                    654:                 */
                    655:                switch (ovcase) {
                    656:                case 0: /* no overlap */
                    657:                        if (needtolink) {
                    658:                                *prev = lock;
                    659:                                lock->lf_next = overlap;
                    660:                        }
                    661:                        break;
                    662:
                    663:                case 1: /* overlap == lock */
                    664:                        /*
                    665:                         * If downgrading lock, others may be
                    666:                         * able to acquire it.
                    667:                         */
                    668:                        if (lock->lf_type == F_RDLCK &&
                    669:                            overlap->lf_type == F_WRLCK)
                    670:                                lf_wakelock(overlap);
                    671:                        overlap->lf_type = lock->lf_type;
1.38      christos  672:                        lf_free(lock);
1.1       ws        673:                        lock = overlap; /* for debug output below */
                    674:                        break;
                    675:
                    676:                case 2: /* overlap contains lock */
                    677:                        /*
                    678:                         * Check for common starting point and different types.
                    679:                         */
                    680:                        if (overlap->lf_type == lock->lf_type) {
1.38      christos  681:                                lf_free(lock);
1.1       ws        682:                                lock = overlap; /* for debug output below */
                    683:                                break;
                    684:                        }
                    685:                        if (overlap->lf_start == lock->lf_start) {
                    686:                                *prev = lock;
                    687:                                lock->lf_next = overlap;
                    688:                                overlap->lf_start = lock->lf_end + 1;
                    689:                        } else
1.27      yamt      690:                                lf_split(overlap, lock, sparelock);
1.1       ws        691:                        lf_wakelock(overlap);
                    692:                        break;
                    693:
                    694:                case 3: /* lock contains overlap */
                    695:                        /*
                    696:                         * If downgrading lock, others may be able to
                    697:                         * acquire it, otherwise take the list.
                    698:                         */
                    699:                        if (lock->lf_type == F_RDLCK &&
                    700:                            overlap->lf_type == F_WRLCK) {
                    701:                                lf_wakelock(overlap);
                    702:                        } else {
1.19      matt      703:                                while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) {
1.16      sommerfe  704:                                        KASSERT(ltmp->lf_next == overlap);
1.12      fvdl      705:                                        TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
                    706:                                            lf_block);
1.16      sommerfe  707:                                        ltmp->lf_next = lock;
1.12      fvdl      708:                                        TAILQ_INSERT_TAIL(&lock->lf_blkhd,
                    709:                                            ltmp, lf_block);
                    710:                                }
1.1       ws        711:                        }
                    712:                        /*
                    713:                         * Add the new lock if necessary and delete the overlap.
                    714:                         */
                    715:                        if (needtolink) {
                    716:                                *prev = lock;
                    717:                                lock->lf_next = overlap->lf_next;
                    718:                                prev = &lock->lf_next;
                    719:                                needtolink = 0;
                    720:                        } else
                    721:                                *prev = overlap->lf_next;
1.39      christos  722:                        lf_free(overlap);
1.1       ws        723:                        continue;
                    724:
                    725:                case 4: /* overlap starts before lock */
                    726:                        /*
                    727:                         * Add lock after overlap on the list.
                    728:                         */
                    729:                        lock->lf_next = overlap->lf_next;
                    730:                        overlap->lf_next = lock;
                    731:                        overlap->lf_end = lock->lf_start - 1;
                    732:                        prev = &lock->lf_next;
                    733:                        lf_wakelock(overlap);
                    734:                        needtolink = 0;
                    735:                        continue;
                    736:
                    737:                case 5: /* overlap ends after lock */
                    738:                        /*
                    739:                         * Add the new lock before overlap.
                    740:                         */
                    741:                        if (needtolink) {
                    742:                                *prev = lock;
                    743:                                lock->lf_next = overlap;
                    744:                        }
                    745:                        overlap->lf_start = lock->lf_end + 1;
                    746:                        lf_wakelock(overlap);
                    747:                        break;
                    748:                }
                    749:                break;
                    750:        }
                    751: #ifdef LOCKF_DEBUG
                    752:        if (lockf_debug & 1) {
                    753:                lf_print("lf_setlock: got the lock", lock);
                    754:                lf_printlist("lf_setlock", lock);
                    755:        }
                    756: #endif /* LOCKF_DEBUG */
1.29      yamt      757:        return 0;
1.1       ws        758: }
                    759:
                    760: /*
                    761:  * Check whether there is a blocking lock,
                    762:  * and if so return its process identifier.
                    763:  */
1.24      yamt      764: static int
1.25      yamt      765: lf_getlock(struct lockf *lock, struct flock *fl)
1.1       ws        766: {
1.15      augustss  767:        struct lockf *block;
1.1       ws        768:
                    769: #ifdef LOCKF_DEBUG
                    770:        if (lockf_debug & 1)
                    771:                lf_print("lf_getlock", lock);
                    772: #endif /* LOCKF_DEBUG */
                    773:
1.7       christos  774:        if ((block = lf_getblock(lock)) != NULL) {
1.1       ws        775:                fl->l_type = block->lf_type;
                    776:                fl->l_whence = SEEK_SET;
                    777:                fl->l_start = block->lf_start;
                    778:                if (block->lf_end == -1)
                    779:                        fl->l_len = 0;
                    780:                else
                    781:                        fl->l_len = block->lf_end - block->lf_start + 1;
                    782:                if (block->lf_flags & F_POSIX)
1.23      mycroft   783:                        fl->l_pid = ((struct proc *)block->lf_id)->p_pid;
1.1       ws        784:                else
                    785:                        fl->l_pid = -1;
                    786:        } else {
                    787:                fl->l_type = F_UNLCK;
                    788:        }
1.29      yamt      789:        return 0;
1.1       ws        790: }
                    791:
                    792: /*
1.45      thorpej   793:  * Do an advisory lock operation.
1.1       ws        794:  */
1.45      thorpej   795: int
                    796: lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
1.1       ws        797: {
1.45      thorpej   798:        struct proc *p = curproc;
                    799:        struct flock *fl = ap->a_fl;
                    800:        struct lockf *lock = NULL;
                    801:        struct lockf *sparelock;
                    802:        struct simplelock *interlock = &ap->a_vp->v_interlock;
                    803:        off_t start, end;
                    804:        int error = 0;
1.1       ws        805:
1.45      thorpej   806:        /*
                    807:         * Convert the flock structure into a start and end.
                    808:         */
                    809:        switch (fl->l_whence) {
                    810:        case SEEK_SET:
                    811:        case SEEK_CUR:
1.1       ws        812:                /*
1.45      thorpej   813:                 * Caller is responsible for adding any necessary offset
                    814:                 * when SEEK_CUR is used.
1.1       ws        815:                 */
1.45      thorpej   816:                start = fl->l_start;
                    817:                break;
                    818:
                    819:        case SEEK_END:
                    820:                start = size + fl->l_start;
                    821:                break;
                    822:
                    823:        default:
                    824:                return EINVAL;
1.1       ws        825:        }
1.45      thorpej   826:        if (start < 0)
                    827:                return EINVAL;
1.1       ws        828:
1.45      thorpej   829:        /*
                    830:         * allocate locks before acquire simple lock.
                    831:         * we need two locks in the worst case.
                    832:         */
                    833:        switch (ap->a_op) {
                    834:        case F_SETLK:
                    835:        case F_UNLCK:
1.1       ws        836:                /*
1.45      thorpej   837:                 * XXX for F_UNLCK case, we can re-use lock.
1.1       ws        838:                 */
1.46      christos  839:                if ((ap->a_flags & F_FLOCK) == 0) {
1.45      thorpej   840:                        /*
                    841:                         * byte-range lock might need one more lock.
                    842:                         */
1.49      elad      843:                        sparelock = lf_alloc(kauth_cred_geteuid(p->p_cred), 0);
1.45      thorpej   844:                        if (sparelock == NULL) {
                    845:                                error = ENOMEM;
                    846:                                goto quit;
                    847:                        }
                    848:                        break;
1.1       ws        849:                }
1.45      thorpej   850:                /* FALLTHROUGH */
                    851:
                    852:        case F_GETLK:
                    853:                sparelock = NULL;
                    854:                break;
                    855:
                    856:        default:
                    857:                return EINVAL;
                    858:        }
                    859:
1.49      elad      860:        lock = lf_alloc(kauth_cred_geteuid(p->p_cred), ap->a_op != F_UNLCK ? 1 : 2);
1.45      thorpej   861:        if (lock == NULL) {
                    862:                error = ENOMEM;
                    863:                goto quit;
1.1       ws        864:        }
                    865:
1.45      thorpej   866:        simple_lock(interlock);
1.1       ws        867:
                    868:        /*
1.45      thorpej   869:         * Avoid the common case of unlocking when inode has no locks.
1.1       ws        870:         */
1.45      thorpej   871:        if (*head == (struct lockf *)0) {
                    872:                if (ap->a_op != F_SETLK) {
                    873:                        fl->l_type = F_UNLCK;
                    874:                        error = 0;
                    875:                        goto quit_unlock;
                    876:                }
1.1       ws        877:        }
1.45      thorpej   878:
                    879:        if (fl->l_len == 0)
                    880:                end = -1;
                    881:        else
                    882:                end = start + fl->l_len - 1;
1.1       ws        883:        /*
1.45      thorpej   884:         * Create the lockf structure.
                    885:         */
                    886:        lock->lf_start = start;
                    887:        lock->lf_end = end;
                    888:        /* XXX NJWLWP
                    889:         * I don't want to make the entire VFS universe use LWPs, because
                    890:         * they don't need them, for the most part. This is an exception,
                    891:         * and a kluge.
1.1       ws        892:         */
1.45      thorpej   893:
                    894:        lock->lf_head = head;
                    895:        lock->lf_type = fl->l_type;
                    896:        lock->lf_next = (struct lockf *)0;
                    897:        TAILQ_INIT(&lock->lf_blkhd);
                    898:        lock->lf_flags = ap->a_flags;
                    899:        if (lock->lf_flags & F_POSIX) {
                    900:                KASSERT(curproc == (struct proc *)ap->a_id);
                    901:        }
                    902:        lock->lf_id = (struct proc *)ap->a_id;
                    903:
1.1       ws        904:        /*
1.45      thorpej   905:         * Do the requested operation.
1.1       ws        906:         */
1.45      thorpej   907:        switch (ap->a_op) {
1.1       ws        908:
1.45      thorpej   909:        case F_SETLK:
                    910:                error = lf_setlock(lock, &sparelock, interlock);
                    911:                lock = NULL; /* lf_setlock freed it */
                    912:                break;
1.1       ws        913:
1.45      thorpej   914:        case F_UNLCK:
                    915:                error = lf_clearlock(lock, &sparelock);
                    916:                break;
1.1       ws        917:
1.45      thorpej   918:        case F_GETLK:
                    919:                error = lf_getlock(lock, fl);
                    920:                break;
1.37      perry     921:
1.45      thorpej   922:        default:
                    923:                break;
                    924:                /* NOTREACHED */
                    925:        }
1.1       ws        926:
1.45      thorpej   927: quit_unlock:
                    928:        simple_unlock(interlock);
                    929: quit:
                    930:        if (lock)
                    931:                lf_free(lock);
                    932:        if (sparelock)
                    933:                lf_free(sparelock);
1.1       ws        934:
1.45      thorpej   935:        return error;
1.1       ws        936: }

CVSweb <webmaster@jp.NetBSD.org>