Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

===================================================================
RCS file: /ftp/cvs/cvsroot/src/sys/kern/vfs_lockf.c,v
rcsdiff: /ftp/cvs/cvsroot/src/sys/kern/vfs_lockf.c,v: warning: Unknown phrases like `commitid ...;' are present.
retrieving revision 1.45.2.6
retrieving revision 1.46
diff -u -p -r1.45.2.6 -r1.46
--- src/sys/kern/vfs_lockf.c	2008/03/24 09:39:03	1.45.2.6
+++ src/sys/kern/vfs_lockf.c	2005/10/28 15:37:23	1.46
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_lockf.c,v 1.45.2.6 2008/03/24 09:39:03 yamt Exp $	*/
+/*	$NetBSD: vfs_lockf.c,v 1.46 2005/10/28 15:37:23 christos Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.45.2.6 2008/03/24 09:39:03 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.46 2005/10/28 15:37:23 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -46,43 +46,9 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,
 #include <sys/pool.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
-#include <sys/atomic.h>
-#include <sys/kauth.h>
 
-/*
- * The lockf structure is a kernel structure which contains the information
- * associated with a byte range lock.  The lockf structures are linked into
- * the vnode structure.  Locks are sorted by the starting byte of the lock for
- * efficiency.
- *
- * lf_next is used for two purposes, depending on whether the lock is
- * being held, or is in conflict with an existing lock.  If this lock
- * is held, it indicates the next lock on the same vnode.
- * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
- * must be queued on the lf_blkhd TAILQ of lock->lf_next.
- */
-
-TAILQ_HEAD(locklist, lockf);
-
-struct lockf {
-	short	lf_flags;	 /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */
-	short	lf_type;	 /* Lock type: F_RDLCK, F_WRLCK */
-	off_t	lf_start;	 /* The byte # of the start of the lock */
-	off_t	lf_end;		 /* The byte # of the end of the lock (-1=EOF)*/
-	void	*lf_id;		 /* process or file description holding lock */
-	struct	lockf **lf_head; /* Back pointer to the head of lockf list */
-	struct	lockf *lf_next;	 /* Next lock on this vnode, or blocking lock */
-	struct  locklist lf_blkhd; /* List of requests blocked on this lock */
-	TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
-	uid_t	lf_uid;		 /* User ID responsible */
-	kcondvar_t lf_cv;	 /* Signalling */
-};
-
-/* Maximum length of sleep chains to traverse to try and detect deadlock. */
-#define MAXDEPTH 50
-
-static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl",
-    &pool_allocator_nointr, IPL_NONE);
+POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl",
+    &pool_allocator_nointr);
 
 /*
  * This variable controls the maximum number of processes that will
@@ -94,12 +60,13 @@ int maxlockdepth = MAXDEPTH;
 int	lockf_debug = 0;
 #endif
 
+#define NOLOCKF (struct lockf *)0
 #define SELF	0x1
 #define OTHERS	0x2
 
 /*
  * XXX TODO
- * Misc cleanups: "void *id" should be visible in the API as a
+ * Misc cleanups: "caddr_t id" should be visible in the API as a
  * "struct proc *".
  * (This requires rototilling all VFS's which support advisory locking).
  */
@@ -128,7 +95,7 @@ int maxlocksperuid = 1024;
  * Print out a lock.
  */
 static void
-lf_print(const char *tag, struct lockf *lock)
+lf_print(char *tag, struct lockf *lock)
 {
 
 	printf("%s: lock %p for ", tag, lock);
@@ -148,7 +115,7 @@ lf_print(const char *tag, struct lockf *
 }
 
 static void
-lf_printlist(const char *tag, struct lockf *lock)
+lf_printlist(char *tag, struct lockf *lock)
 {
 	struct lockf *lf, *blk;
 
@@ -192,19 +159,19 @@ lf_alloc(uid_t uid, int allowfail)
 {
 	struct uidinfo *uip;
 	struct lockf *lock;
-	u_long lcnt;
+	int s;
 
 	uip = uid_find(uid);
-	lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt);
-	if (uid && allowfail && lcnt >
+	UILOCK(uip, s);
+	if (uid && allowfail && uip->ui_lockcnt >
 	    (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) {
-		atomic_dec_ulong(&uip->ui_lockcnt);
+		UIUNLOCK(uip, s);
 		return NULL;
 	}
-
+	uip->ui_lockcnt++;
+	UIUNLOCK(uip, s);
 	lock = pool_get(&lockfpool, PR_WAITOK);
 	lock->lf_uid = uid;
-	cv_init(&lock->lf_cv, "lockf");
 	return lock;
 }
 
@@ -212,11 +179,12 @@ static void
 lf_free(struct lockf *lock)
 {
 	struct uidinfo *uip;
+	int s;
 
 	uip = uid_find(lock->lf_uid);
-	atomic_dec_ulong(&uip->ui_lockcnt);
-
-	cv_destroy(&lock->lf_cv);
+	UILOCK(uip, s);
+	uip->ui_lockcnt--;
+	UIUNLOCK(uip, s);
 	pool_put(&lockfpool, lock);
 }
 
@@ -234,7 +202,7 @@ lf_findoverlap(struct lockf *lf, struct 
 	off_t start, end;
 
 	*overlap = lf;
-	if (lf == NULL)
+	if (lf == NOLOCKF)
 		return 0;
 #ifdef LOCKF_DEBUG
 	if (lockf_debug & 2)
@@ -242,7 +210,7 @@ lf_findoverlap(struct lockf *lf, struct 
 #endif /* LOCKF_DEBUG */
 	start = lock->lf_start;
 	end = lock->lf_end;
-	while (lf != NULL) {
+	while (lf != NOLOCKF) {
 		if (((type == SELF) && lf->lf_id != lock->lf_id) ||
 		    ((type == OTHERS) && lf->lf_id == lock->lf_id)) {
 			*prev = &lf->lf_next;
@@ -387,12 +355,12 @@ lf_wakelock(struct lockf *listhead)
 	while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) {
 		KASSERT(wakelock->lf_next == listhead);
 		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
-		wakelock->lf_next = NULL;
+		wakelock->lf_next = NOLOCKF;
 #ifdef LOCKF_DEBUG
 		if (lockf_debug & 2)
 			lf_print("lf_wakelock: awakening", wakelock);
 #endif
-		cv_broadcast(&wakelock->lf_cv);
+		wakeup(wakelock);
 	}
 }
 
@@ -410,7 +378,7 @@ lf_clearlock(struct lockf *unlock, struc
 	struct lockf *overlap, **prev;
 	int ovcase;
 
-	if (lf == NULL)
+	if (lf == NOLOCKF)
 		return 0;
 #ifdef LOCKF_DEBUG
 	if (unlock->lf_type != F_UNLCK)
@@ -420,7 +388,7 @@ lf_clearlock(struct lockf *unlock, struc
 #endif /* LOCKF_DEBUG */
 	prev = head;
 	while ((ovcase = lf_findoverlap(lf, unlock, SELF,
-	    &prev, &overlap)) != 0) {
+					&prev, &overlap)) != 0) {
 		/*
 		 * Wakeup the list of locks to be retried.
 		 */
@@ -489,7 +457,7 @@ lf_getblock(struct lockf *lock)
 		 */
 		lf = overlap->lf_next;
 	}
-	return NULL;
+	return NOLOCKF;
 }
 
 /*
@@ -497,13 +465,13 @@ lf_getblock(struct lockf *lock)
  */
 static int
 lf_setlock(struct lockf *lock, struct lockf **sparelock,
-    kmutex_t *interlock)
+    struct simplelock *interlock)
 {
 	struct lockf *block;
 	struct lockf **head = lock->lf_head;
 	struct lockf **prev, *overlap, *ltmp;
 	static char lockstr[] = "lockf";
-	int ovcase, needtolink, error;
+	int ovcase, priority, needtolink, error;
 
 #ifdef LOCKF_DEBUG
 	if (lockf_debug & 1)
@@ -511,12 +479,12 @@ lf_setlock(struct lockf *lock, struct lo
 #endif /* LOCKF_DEBUG */
 
 	/*
-	 * XXX Here we used to set the sleep priority so that writers
-	 * took priority.  That's of dubious use, and is not possible
-	 * with condition variables.  Need to find a better way to ensure
-	 * fairness.
+	 * Set the priority
 	 */
-        
+	priority = PLOCK;
+	if (lock->lf_type == F_WRLCK)
+		priority += 4;
+	priority |= PCATCH;
 	/*
 	 * Scan lock list for this file looking for locks that would block us.
 	 */
@@ -541,41 +509,23 @@ lf_setlock(struct lockf *lock, struct lo
 		if ((lock->lf_flags & F_POSIX) &&
 		    (block->lf_flags & F_POSIX)) {
 			struct lwp *wlwp;
-			volatile const struct lockf *waitblock;
+			__volatile const struct lockf *waitblock;
 			int i = 0;
-			struct proc *p;
 
-			p = (struct proc *)block->lf_id;
-			KASSERT(p != NULL);
-			while (i++ < maxlockdepth) {
-				mutex_enter(&p->p_smutex);
-				if (p->p_nlwps > 1) {
-					mutex_exit(&p->p_smutex);
-					break;
-				}
-				wlwp = LIST_FIRST(&p->p_lwps);
-				lwp_lock(wlwp);
-				if (wlwp->l_wmesg != lockstr) {
-					lwp_unlock(wlwp);
-					mutex_exit(&p->p_smutex);
-					break;
-				}
+			/*
+			 * The block is waiting on something.  if_lwp will be
+			 * 0 once the lock is granted, so we terminate the
+			 * loop if we find this.
+			 */
+			wlwp = block->lf_lwp;
+			while (wlwp && (i++ < maxlockdepth)) {
 				waitblock = wlwp->l_wchan;
-				lwp_unlock(wlwp);
-				mutex_exit(&p->p_smutex);
-				if (waitblock == NULL) {
-					/*
-					 * this lwp just got up but
-					 * not returned from ltsleep yet.
-					 */
-					break;
-				}
 				/* Get the owner of the blocking lock */
 				waitblock = waitblock->lf_next;
 				if ((waitblock->lf_flags & F_POSIX) == 0)
 					break;
-				p = (struct proc *)waitblock->lf_id;
-				if (p == curproc) {
+				wlwp = waitblock->lf_lwp;
+				if (wlwp == lock->lf_lwp) {
 					lf_free(lock);
 					return EDEADLK;
 				}
@@ -613,7 +563,7 @@ lf_setlock(struct lockf *lock, struct lo
 			lf_printlist("lf_setlock", block);
 		}
 #endif /* LOCKF_DEBUG */
-		error = cv_wait_sig(&lock->lf_cv, interlock);
+		error = ltsleep(lock, priority, lockstr, 0, interlock);
 
 		/*
 		 * We may have been awakened by a signal (in
@@ -621,11 +571,11 @@ lf_setlock(struct lockf *lock, struct lo
 		 * blocked list) and/or by another process
 		 * releasing a lock (in which case we have already
 		 * been removed from the blocked list and our
-		 * lf_next field set to NULL).
+		 * lf_next field set to NOLOCKF).
 		 */
-		if (lock->lf_next != NULL) {
+		if (lock->lf_next != NOLOCKF) {
 			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
-			lock->lf_next = NULL;
+			lock->lf_next = NOLOCKF;
 		}
 		if (error) {
 			lf_free(lock);
@@ -640,6 +590,7 @@ lf_setlock(struct lockf *lock, struct lo
 	 * Skip over locks owned by other processes.
 	 * Handle any locks that overlap and are owned by ourselves.
 	 */
+	lock->lf_lwp = 0;
 	prev = head;
 	block = *head;
 	needtolink = 1;
@@ -799,11 +750,11 @@ lf_getlock(struct lockf *lock, struct fl
 int
 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size)
 {
-	struct lwp *l = curlwp;
+	struct proc *p = curproc;
 	struct flock *fl = ap->a_fl;
 	struct lockf *lock = NULL;
 	struct lockf *sparelock;
-	kmutex_t *interlock = &ap->a_vp->v_interlock;
+	struct simplelock *interlock = &ap->a_vp->v_interlock;
 	off_t start, end;
 	int error = 0;
 
@@ -831,20 +782,20 @@ lf_advlock(struct vop_advlock_args *ap, 
 		return EINVAL;
 
 	/*
-	 * Allocate locks before acquiring the interlock.  We need two
-	 * locks in the worst case.
+	 * allocate locks before acquire simple lock.
+	 * we need two locks in the worst case.
 	 */
 	switch (ap->a_op) {
 	case F_SETLK:
 	case F_UNLCK:
 		/*
-		 * XXX For F_UNLCK case, we can re-use the lock.
+		 * XXX for F_UNLCK case, we can re-use lock.
 		 */
 		if ((ap->a_flags & F_FLOCK) == 0) {
 			/*
-			 * Byte-range lock might need one more lock.
+			 * byte-range lock might need one more lock.
 			 */
-			sparelock = lf_alloc(kauth_cred_geteuid(l->l_cred), 0);
+			sparelock = lf_alloc(p->p_ucred->cr_uid, 0);
 			if (sparelock == NULL) {
 				error = ENOMEM;
 				goto quit;
@@ -861,14 +812,13 @@ lf_advlock(struct vop_advlock_args *ap, 
 		return EINVAL;
 	}
 
-	lock = lf_alloc(kauth_cred_geteuid(l->l_cred),
-	    ap->a_op != F_UNLCK ? 1 : 2);
+	lock = lf_alloc(p->p_ucred->cr_uid, ap->a_op != F_UNLCK ? 1 : 2);
 	if (lock == NULL) {
 		error = ENOMEM;
 		goto quit;
 	}
 
-	mutex_enter(interlock);
+	simple_lock(interlock);
 
 	/*
 	 * Avoid the common case of unlocking when inode has no locks.
@@ -905,6 +855,7 @@ lf_advlock(struct vop_advlock_args *ap, 
 		KASSERT(curproc == (struct proc *)ap->a_id);
 	}
 	lock->lf_id = (struct proc *)ap->a_id;
+	lock->lf_lwp = curlwp;
 
 	/*
 	 * Do the requested operation.
@@ -930,7 +881,7 @@ lf_advlock(struct vop_advlock_args *ap, 
 	}
 
 quit_unlock:
-	mutex_exit(interlock);
+	simple_unlock(interlock);
 quit:
 	if (lock)
 		lf_free(lock);