Merge tag 'afs-fixes-20171124' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull AFS fixes from David Howells:

 - Make AFS file locking work again.

 - Don't write to a page that's being written out, but wait for it to
   complete.

 - Do d_drop() and d_add() in the right places.

 - Put keys on error paths.

 - Remove some redundant code.

* tag 'afs-fixes-20171124' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: remove redundant assignment of dvnode to itself
  afs: cell: Remove unnecessary code in afs_lookup_cell
  afs: Fix signal handling in some file ops
  afs: Fix some dentry handling in dir ops and missing key_puts
  afs: Make afs_write_begin() avoid writing to a page that's being stored
  afs: Fix file locking
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 1858c91..9bb921d 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -207,13 +207,8 @@
 		rcu_read_lock();
 		cell = afs_lookup_cell_rcu(net, name, namesz);
 		rcu_read_unlock();
-		if (!IS_ERR(cell)) {
-			if (excl) {
-				afs_put_cell(net, cell);
-				return ERR_PTR(-EEXIST);
-			}
+		if (!IS_ERR(cell))
 			goto wait_for_cell;
-		}
 	}
 
 	/* Assume we're probably going to create a cell and preallocate and
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ab618d3..ff8d5bf 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -765,6 +765,8 @@
 	if (fc->ac.error < 0)
 		return;
 
+	d_drop(new_dentry);
+
 	inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
 			 newfid, newstatus, newcb, fc->cbi);
 	if (IS_ERR(inode)) {
@@ -775,9 +777,7 @@
 		return;
 	}
 
-	d_instantiate(new_dentry, inode);
-	if (d_unhashed(new_dentry))
-		d_rehash(new_dentry);
+	d_add(new_dentry, inode);
 }
 
 /*
@@ -818,6 +818,8 @@
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
 			goto error_key;
+	} else {
+		goto error_key;
 	}
 
 	key_put(key);
@@ -972,7 +974,7 @@
 	struct afs_fs_cursor fc;
 	struct afs_file_status newstatus;
 	struct afs_callback newcb;
-	struct afs_vnode *dvnode = dvnode = AFS_FS_I(dir);
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct afs_fid newfid;
 	struct key *key;
 	int ret;
@@ -1006,6 +1008,8 @@
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
 			goto error_key;
+	} else {
+		goto error_key;
 	}
 
 	key_put(key);
@@ -1053,7 +1057,7 @@
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
 			afs_end_vnode_operation(&fc);
-			return -ERESTARTSYS;
+			goto error_key;
 		}
 
 		while (afs_select_fileserver(&fc)) {
@@ -1071,6 +1075,8 @@
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
 			goto error_key;
+	} else {
+		goto error_key;
 	}
 
 	key_put(key);
@@ -1130,6 +1136,8 @@
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
 			goto error_key;
+	} else {
+		goto error_key;
 	}
 
 	key_put(key);
@@ -1180,7 +1188,7 @@
 		if (orig_dvnode != new_dvnode) {
 			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
 				afs_end_vnode_operation(&fc);
-				return -ERESTARTSYS;
+				goto error_key;
 			}
 		}
 		while (afs_select_fileserver(&fc)) {
@@ -1199,14 +1207,9 @@
 			goto error_key;
 	}
 
-	key_put(key);
-	_leave(" = 0");
-	return 0;
-
 error_key:
 	key_put(key);
 error:
-	d_drop(new_dentry);
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 7571a5d..c40ba2f 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -170,7 +170,7 @@
 {
 	struct afs_vnode *vnode =
 		container_of(work, struct afs_vnode, lock_work.work);
-	struct file_lock *fl;
+	struct file_lock *fl, *next;
 	afs_lock_type_t type;
 	struct key *key;
 	int ret;
@@ -179,117 +179,136 @@
 
 	spin_lock(&vnode->lock);
 
-	if (test_bit(AFS_VNODE_UNLOCKING, &vnode->flags)) {
+again:
+	_debug("wstate %u for %p", vnode->lock_state, vnode);
+	switch (vnode->lock_state) {
+	case AFS_VNODE_LOCK_NEED_UNLOCK:
 		_debug("unlock");
+		vnode->lock_state = AFS_VNODE_LOCK_UNLOCKING;
 		spin_unlock(&vnode->lock);
 
 		/* attempt to release the server lock; if it fails, we just
-		 * wait 5 minutes and it'll time out anyway */
-		ret = afs_release_lock(vnode, vnode->unlock_key);
+		 * wait 5 minutes and it'll expire anyway */
+		ret = afs_release_lock(vnode, vnode->lock_key);
 		if (ret < 0)
 			printk(KERN_WARNING "AFS:"
 			       " Failed to release lock on {%x:%x} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
 
 		spin_lock(&vnode->lock);
-		key_put(vnode->unlock_key);
-		vnode->unlock_key = NULL;
-		clear_bit(AFS_VNODE_UNLOCKING, &vnode->flags);
-	}
+		key_put(vnode->lock_key);
+		vnode->lock_key = NULL;
+		vnode->lock_state = AFS_VNODE_LOCK_NONE;
 
-	/* if we've got a lock, then it must be time to extend that lock as AFS
-	 * locks time out after 5 minutes */
-	if (!list_empty(&vnode->granted_locks)) {
+		if (list_empty(&vnode->pending_locks)) {
+			spin_unlock(&vnode->lock);
+			return;
+		}
+
+		/* The new front of the queue now owns the state variables. */
+		next = list_entry(vnode->pending_locks.next,
+				  struct file_lock, fl_u.afs.link);
+		vnode->lock_key = afs_file_key(next->fl_file);
+		vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
+		goto again;
+
+	/* If we've already got a lock, then it must be time to extend that
+	 * lock as AFS locks time out after 5 minutes.
+	 */
+	case AFS_VNODE_LOCK_GRANTED:
 		_debug("extend");
 
-		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
-			BUG();
-		fl = list_entry(vnode->granted_locks.next,
-				struct file_lock, fl_u.afs.link);
-		key = key_get(afs_file_key(fl->fl_file));
+		ASSERT(!list_empty(&vnode->granted_locks));
+
+		key = key_get(vnode->lock_key);
+		vnode->lock_state = AFS_VNODE_LOCK_EXTENDING;
 		spin_unlock(&vnode->lock);
 
-		ret = afs_extend_lock(vnode, key);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		ret = afs_extend_lock(vnode, key); /* RPC */
 		key_put(key);
-		switch (ret) {
-		case 0:
+
+		if (ret < 0)
+			pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+				   vnode->fid.vid, vnode->fid.vnode, ret);
+
+		spin_lock(&vnode->lock);
+
+		if (vnode->lock_state != AFS_VNODE_LOCK_EXTENDING)
+			goto again;
+		vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+
+		if (ret == 0)
 			afs_schedule_lock_extension(vnode);
-			break;
-		default:
-			/* ummm... we failed to extend the lock - retry
-			 * extension shortly */
-			printk(KERN_WARNING "AFS:"
-			       " Failed to extend lock on {%x:%x} error %d\n",
-			       vnode->fid.vid, vnode->fid.vnode, ret);
+		else
 			queue_delayed_work(afs_lock_manager, &vnode->lock_work,
 					   HZ * 10);
-			break;
-		}
-		_leave(" [extend]");
+		spin_unlock(&vnode->lock);
+		_leave(" [ext]");
 		return;
-	}
 
-	/* if we don't have a granted lock, then we must've been called back by
-	 * the server, and so if might be possible to get a lock we're
-	 * currently waiting for */
-	if (!list_empty(&vnode->pending_locks)) {
+		/* If we don't have a granted lock, then we must've been called
+		 * back by the server, and so if might be possible to get a
+		 * lock we're currently waiting for.
+		 */
+	case AFS_VNODE_LOCK_WAITING_FOR_CB:
 		_debug("get");
 
-		if (test_and_set_bit(AFS_VNODE_LOCKING, &vnode->flags))
-			BUG();
-		fl = list_entry(vnode->pending_locks.next,
-				struct file_lock, fl_u.afs.link);
-		key = key_get(afs_file_key(fl->fl_file));
-		type = (fl->fl_type == F_RDLCK) ?
-			AFS_LOCK_READ : AFS_LOCK_WRITE;
+		key = key_get(vnode->lock_key);
+		type = vnode->lock_type;
+		vnode->lock_state = AFS_VNODE_LOCK_SETTING;
 		spin_unlock(&vnode->lock);
 
-		ret = afs_set_lock(vnode, key, type);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
+		ret = afs_set_lock(vnode, key, type); /* RPC */
+		key_put(key);
+
+		spin_lock(&vnode->lock);
 		switch (ret) {
 		case -EWOULDBLOCK:
 			_debug("blocked");
 			break;
 		case 0:
 			_debug("acquired");
-			if (type == AFS_LOCK_READ)
-				set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-			else
-				set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-			ret = AFS_LOCK_GRANTED;
+			vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+			/* Fall through */
 		default:
-			spin_lock(&vnode->lock);
-			/* the pending lock may have been withdrawn due to a
-			 * signal */
-			if (list_entry(vnode->pending_locks.next,
-				       struct file_lock, fl_u.afs.link) == fl) {
-				fl->fl_u.afs.state = ret;
-				if (ret == AFS_LOCK_GRANTED)
-					afs_grant_locks(vnode, fl);
-				else
-					list_del_init(&fl->fl_u.afs.link);
-				wake_up(&fl->fl_wait);
-				spin_unlock(&vnode->lock);
-			} else {
+			/* Pass the lock or the error onto the first locker in
+			 * the list - if they're looking for this type of lock.
+			 * If they're not, we assume that whoever asked for it
+			 * took a signal.
+			 */
+			if (list_empty(&vnode->pending_locks)) {
 				_debug("withdrawn");
-				clear_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-				clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-				spin_unlock(&vnode->lock);
-				afs_release_lock(vnode, key);
-				if (!list_empty(&vnode->pending_locks))
-					afs_lock_may_be_available(vnode);
+				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+				goto again;
 			}
-			break;
+
+			fl = list_entry(vnode->pending_locks.next,
+					struct file_lock, fl_u.afs.link);
+			type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+			if (vnode->lock_type != type) {
+				_debug("changed");
+				vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+				goto again;
+			}
+
+			fl->fl_u.afs.state = ret;
+			if (ret == 0)
+				afs_grant_locks(vnode, fl);
+			else
+				list_del_init(&fl->fl_u.afs.link);
+			wake_up(&fl->fl_wait);
+			spin_unlock(&vnode->lock);
+			_leave(" [granted]");
+			return;
 		}
-		key_put(key);
-		_leave(" [pend]");
+
+	default:
+		/* Looks like a lock request was withdrawn. */
+		spin_unlock(&vnode->lock);
+		_leave(" [no]");
 		return;
 	}
-
-	/* looks like the lock request was withdrawn on a signal */
-	spin_unlock(&vnode->lock);
-	_leave(" [no locks]");
 }
 
 /*
@@ -298,15 +317,105 @@
  * AF_RXRPC
  * - the caller must hold the vnode lock
  */
-static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
+static void afs_defer_unlock(struct afs_vnode *vnode)
 {
-	cancel_delayed_work(&vnode->lock_work);
-	if (!test_and_clear_bit(AFS_VNODE_READLOCKED, &vnode->flags) &&
-	    !test_and_clear_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
-		BUG();
-	if (test_and_set_bit(AFS_VNODE_UNLOCKING, &vnode->flags))
-		BUG();
-	vnode->unlock_key = key_get(key);
+	_enter("");
+
+	if (vnode->lock_state == AFS_VNODE_LOCK_GRANTED ||
+	    vnode->lock_state == AFS_VNODE_LOCK_EXTENDING) {
+		cancel_delayed_work(&vnode->lock_work);
+
+		vnode->lock_state = AFS_VNODE_LOCK_NEED_UNLOCK;
+		afs_lock_may_be_available(vnode);
+	}
+}
+
+/*
+ * Check that our view of the file metadata is up to date and check to see
+ * whether we think that we have a locking permit.
+ */
+static int afs_do_setlk_check(struct afs_vnode *vnode, struct key *key,
+			      afs_lock_type_t type, bool can_sleep)
+{
+	afs_access_t access;
+	int ret;
+
+	/* Make sure we've got a callback on this file and that our view of the
+	 * data version is up to date.
+	 */
+	ret = afs_validate(vnode, key);
+	if (ret < 0)
+		return ret;
+
+	/* Check the permission set to see if we're actually going to be
+	 * allowed to get a lock on this file.
+	 */
+	ret = afs_check_permit(vnode, key, &access);
+	if (ret < 0)
+		return ret;
+
+	/* At a rough estimation, you need LOCK, WRITE or INSERT perm to
+	 * read-lock a file and WRITE or INSERT perm to write-lock a file.
+	 *
+	 * We can't rely on the server to do this for us since if we want to
+	 * share a read lock that we already have, we won't go the server.
+	 */
+	if (type == AFS_LOCK_READ) {
+		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE | AFS_ACE_LOCK)))
+			return -EACCES;
+		if (vnode->status.lock_count == -1 && !can_sleep)
+			return -EAGAIN; /* Write locked */
+	} else {
+		if (!(access & (AFS_ACE_INSERT | AFS_ACE_WRITE)))
+			return -EACCES;
+		if (vnode->status.lock_count != 0 && !can_sleep)
+			return -EAGAIN; /* Locked */
+	}
+
+	return 0;
+}
+
+/*
+ * Remove the front runner from the pending queue.
+ * - The caller must hold vnode->lock.
+ */
+static void afs_dequeue_lock(struct afs_vnode *vnode, struct file_lock *fl)
+{
+	struct file_lock *next;
+
+	_enter("");
+
+	/* ->lock_type, ->lock_key and ->lock_state only belong to this
+	 * file_lock if we're at the front of the pending queue or if we have
+	 * the lock granted or if the lock_state is NEED_UNLOCK or UNLOCKING.
+	 */
+	if (vnode->granted_locks.next == &fl->fl_u.afs.link &&
+	    vnode->granted_locks.prev == &fl->fl_u.afs.link) {
+		list_del_init(&fl->fl_u.afs.link);
+		afs_defer_unlock(vnode);
+		return;
+	}
+
+	if (!list_empty(&vnode->granted_locks) ||
+	    vnode->pending_locks.next != &fl->fl_u.afs.link) {
+		list_del_init(&fl->fl_u.afs.link);
+		return;
+	}
+
+	list_del_init(&fl->fl_u.afs.link);
+	key_put(vnode->lock_key);
+	vnode->lock_key = NULL;
+	vnode->lock_state = AFS_VNODE_LOCK_NONE;
+
+	if (list_empty(&vnode->pending_locks))
+		return;
+
+	/* The new front of the queue now owns the state variables. */
+	next = list_entry(vnode->pending_locks.next,
+			  struct file_lock, fl_u.afs.link);
+	vnode->lock_key = afs_file_key(next->fl_file);
+	vnode->lock_type = (next->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
+	vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
 	afs_lock_may_be_available(vnode);
 }
 
@@ -315,7 +424,7 @@
  */
 static int afs_do_setlk(struct file *file, struct file_lock *fl)
 {
-	struct inode *inode = file_inode(file);
+	struct inode *inode = locks_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	afs_lock_type_t type;
 	struct key *key = afs_file_key(file);
@@ -333,165 +442,136 @@
 
 	type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
 
-	spin_lock(&inode->i_lock);
-
-	/* make sure we've got a callback on this file and that our view of the
-	 * data version is up to date */
-	ret = afs_validate(vnode, key);
+	ret = afs_do_setlk_check(vnode, key, type, fl->fl_flags & FL_SLEEP);
 	if (ret < 0)
-		goto error;
-
-	if (vnode->status.lock_count != 0 && !(fl->fl_flags & FL_SLEEP)) {
-		ret = -EAGAIN;
-		goto error;
-	}
+		return ret;
 
 	spin_lock(&vnode->lock);
 
-	/* if we've already got a readlock on the server then we can instantly
+	/* If we've already got a readlock on the server then we instantly
 	 * grant another readlock, irrespective of whether there are any
-	 * pending writelocks */
+	 * pending writelocks.
+	 */
 	if (type == AFS_LOCK_READ &&
-	    vnode->flags & (1 << AFS_VNODE_READLOCKED)) {
+	    vnode->lock_state == AFS_VNODE_LOCK_GRANTED &&
+	    vnode->lock_type == AFS_LOCK_READ) {
 		_debug("instant readlock");
-		ASSERTCMP(vnode->flags &
-			  ((1 << AFS_VNODE_LOCKING) |
-			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
 		ASSERT(!list_empty(&vnode->granted_locks));
-		goto sharing_existing_lock;
+		goto share_existing_lock;
 	}
 
-	/* if there's no-one else with a lock on this vnode, then we need to
-	 * ask the server for a lock */
-	if (list_empty(&vnode->pending_locks) &&
-	    list_empty(&vnode->granted_locks)) {
-		_debug("not locked");
-		ASSERTCMP(vnode->flags &
-			  ((1 << AFS_VNODE_LOCKING) |
-			   (1 << AFS_VNODE_READLOCKED) |
-			   (1 << AFS_VNODE_WRITELOCKED)), ==, 0);
-		list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-		set_bit(AFS_VNODE_LOCKING, &vnode->flags);
-		spin_unlock(&vnode->lock);
-
-		ret = afs_set_lock(vnode, key, type);
-		clear_bit(AFS_VNODE_LOCKING, &vnode->flags);
-		switch (ret) {
-		case 0:
-			_debug("acquired");
-			goto acquired_server_lock;
-		case -EWOULDBLOCK:
-			_debug("would block");
-			spin_lock(&vnode->lock);
-			ASSERT(list_empty(&vnode->granted_locks));
-			ASSERTCMP(vnode->pending_locks.next, ==,
-				  &fl->fl_u.afs.link);
-			goto wait;
-		default:
-			spin_lock(&vnode->lock);
-			list_del_init(&fl->fl_u.afs.link);
-			spin_unlock(&vnode->lock);
-			goto error;
-		}
-	}
-
-	/* otherwise, we need to wait for a local lock to become available */
-	_debug("wait local");
 	list_add_tail(&fl->fl_u.afs.link, &vnode->pending_locks);
-wait:
-	if (!(fl->fl_flags & FL_SLEEP)) {
-		_debug("noblock");
-		ret = -EAGAIN;
-		goto abort_attempt;
-	}
+
+	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
+		goto need_to_wait;
+
+	/* We don't have a lock on this vnode and we aren't currently waiting
+	 * for one either, so ask the server for a lock.
+	 *
+	 * Note that we need to be careful if we get interrupted by a signal
+	 * after dispatching the request as we may still get the lock, even
+	 * though we don't wait for the reply (it's not too bad a problem - the
+	 * lock will expire in 10 mins anyway).
+	 */
+	_debug("not locked");
+	vnode->lock_key = key_get(key);
+	vnode->lock_type = type;
+	vnode->lock_state = AFS_VNODE_LOCK_SETTING;
 	spin_unlock(&vnode->lock);
 
-	/* now we need to sleep and wait for the lock manager thread to get the
-	 * lock from the server */
-	_debug("sleep");
-	ret = wait_event_interruptible(fl->fl_wait,
-				       fl->fl_u.afs.state <= AFS_LOCK_GRANTED);
-	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
-		ret = fl->fl_u.afs.state;
-		if (ret < 0)
-			goto error;
-		spin_lock(&vnode->lock);
-		goto given_lock;
-	}
-
-	/* we were interrupted, but someone may still be in the throes of
-	 * giving us the lock */
-	_debug("intr");
-	ASSERTCMP(ret, ==, -ERESTARTSYS);
+	ret = afs_set_lock(vnode, key, type); /* RPC */
 
 	spin_lock(&vnode->lock);
-	if (fl->fl_u.afs.state <= AFS_LOCK_GRANTED) {
-		ret = fl->fl_u.afs.state;
-		if (ret < 0) {
-			spin_unlock(&vnode->lock);
-			goto error;
-		}
-		goto given_lock;
+	switch (ret) {
+	default:
+		goto abort_attempt;
+
+	case -EWOULDBLOCK:
+		/* The server doesn't have a lock-waiting queue, so the client
+		 * will have to retry.  The server will break the outstanding
+		 * callbacks on a file when a lock is released.
+		 */
+		_debug("would block");
+		ASSERT(list_empty(&vnode->granted_locks));
+		ASSERTCMP(vnode->pending_locks.next, ==, &fl->fl_u.afs.link);
+		vnode->lock_state = AFS_VNODE_LOCK_WAITING_FOR_CB;
+		goto need_to_wait;
+
+	case 0:
+		_debug("acquired");
+		break;
 	}
 
+	/* we've acquired a server lock, but it needs to be renewed after 5
+	 * mins */
+	vnode->lock_state = AFS_VNODE_LOCK_GRANTED;
+	afs_schedule_lock_extension(vnode);
+
+share_existing_lock:
+	/* the lock has been granted as far as we're concerned... */
+	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
+	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
+
+given_lock:
+	/* ... but we do still need to get the VFS's blessing */
+	spin_unlock(&vnode->lock);
+
+	ret = posix_lock_file(file, fl, NULL);
+	if (ret < 0)
+		goto vfs_rejected_lock;
+
+	/* Again, make sure we've got a callback on this file and, again, make
+	 * sure that our view of the data version is up to date (we ignore
+	 * errors incurred here and deal with the consequences elsewhere).
+	 */
+	afs_validate(vnode, key);
+	_leave(" = 0");
+	return 0;
+
+need_to_wait:
+	/* We're going to have to wait.  Either this client doesn't have a lock
+	 * on the server yet and we need to wait for a callback to occur, or
+	 * the client does have a lock on the server, but it belongs to some
+	 * other process(es) and is incompatible with the lock we want.
+	 */
+	ret = -EAGAIN;
+	if (fl->fl_flags & FL_SLEEP) {
+		spin_unlock(&vnode->lock);
+
+		_debug("sleep");
+		ret = wait_event_interruptible(fl->fl_wait,
+					       fl->fl_u.afs.state != AFS_LOCK_PENDING);
+
+		spin_lock(&vnode->lock);
+	}
+
+	if (fl->fl_u.afs.state == AFS_LOCK_GRANTED)
+		goto given_lock;
+	if (fl->fl_u.afs.state < 0)
+		ret = fl->fl_u.afs.state;
+
 abort_attempt:
 	/* we aren't going to get the lock, either because we're unwilling to
 	 * wait, or because some signal happened */
 	_debug("abort");
-	if (list_empty(&vnode->granted_locks) &&
-	    vnode->pending_locks.next == &fl->fl_u.afs.link) {
-		if (vnode->pending_locks.prev != &fl->fl_u.afs.link) {
-			/* kick the next pending lock into having a go */
-			list_del_init(&fl->fl_u.afs.link);
-			afs_lock_may_be_available(vnode);
-		}
-	} else {
-		list_del_init(&fl->fl_u.afs.link);
-	}
+	afs_dequeue_lock(vnode, fl);
+
+error_unlock:
 	spin_unlock(&vnode->lock);
-	goto error;
-
-acquired_server_lock:
-	/* we've acquired a server lock, but it needs to be renewed after 5
-	 * mins */
-	spin_lock(&vnode->lock);
-	afs_schedule_lock_extension(vnode);
-	if (type == AFS_LOCK_READ)
-		set_bit(AFS_VNODE_READLOCKED, &vnode->flags);
-	else
-		set_bit(AFS_VNODE_WRITELOCKED, &vnode->flags);
-sharing_existing_lock:
-	/* the lock has been granted as far as we're concerned... */
-	fl->fl_u.afs.state = AFS_LOCK_GRANTED;
-	list_move_tail(&fl->fl_u.afs.link, &vnode->granted_locks);
-given_lock:
-	/* ... but we do still need to get the VFS's blessing */
-	ASSERT(!(vnode->flags & (1 << AFS_VNODE_LOCKING)));
-	ASSERT((vnode->flags & ((1 << AFS_VNODE_READLOCKED) |
-				(1 << AFS_VNODE_WRITELOCKED))) != 0);
-	ret = posix_lock_file(file, fl, NULL);
-	if (ret < 0)
-		goto vfs_rejected_lock;
-	spin_unlock(&vnode->lock);
-
-	/* again, make sure we've got a callback on this file and, again, make
-	 * sure that our view of the data version is up to date (we ignore
-	 * errors incurred here and deal with the consequences elsewhere) */
-	afs_validate(vnode, key);
-
-error:
-	spin_unlock(&inode->i_lock);
 	_leave(" = %d", ret);
 	return ret;
 
 vfs_rejected_lock:
-	/* the VFS rejected the lock we just obtained, so we have to discard
-	 * what we just got */
+	/* The VFS rejected the lock we just obtained, so we have to discard
+	 * what we just got.  We defer this to the lock manager work item to
+	 * deal with.
+	 */
 	_debug("vfs refused %d", ret);
+	spin_lock(&vnode->lock);
 	list_del_init(&fl->fl_u.afs.link);
 	if (list_empty(&vnode->granted_locks))
-		afs_defer_unlock(vnode, key);
-	goto abort_attempt;
+		afs_defer_unlock(vnode);
+	goto error_unlock;
 }
 
 /*
@@ -499,34 +579,21 @@
  */
 static int afs_do_unlk(struct file *file, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
-	struct key *key = afs_file_key(file);
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 	int ret;
 
 	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
+	/* Flush all pending writes before doing anything with locks. */
+	vfs_fsync(file, 0);
+
 	/* only whole-file unlocks are supported */
 	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
 		return -EINVAL;
 
-	fl->fl_ops = &afs_lock_ops;
-	INIT_LIST_HEAD(&fl->fl_u.afs.link);
-	fl->fl_u.afs.state = AFS_LOCK_PENDING;
-
-	spin_lock(&vnode->lock);
 	ret = posix_lock_file(file, fl, NULL);
-	if (ret < 0) {
-		spin_unlock(&vnode->lock);
-		_leave(" = %d [vfs]", ret);
-		return ret;
-	}
-
-	/* discard the server lock only if all granted locks are gone */
-	if (list_empty(&vnode->granted_locks))
-		afs_defer_unlock(vnode, key);
-	spin_unlock(&vnode->lock);
-	_leave(" = 0");
-	return 0;
+	_leave(" = %d [%u]", ret, vnode->lock_state);
+	return ret;
 }
 
 /*
@@ -534,7 +601,7 @@
  */
 static int afs_do_getlk(struct file *file, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 	struct key *key = afs_file_key(file);
 	int ret, lock_count;
 
@@ -542,29 +609,25 @@
 
 	fl->fl_type = F_UNLCK;
 
-	inode_lock(&vnode->vfs_inode);
-
 	/* check local lock records first */
-	ret = 0;
 	posix_test_lock(file, fl);
 	if (fl->fl_type == F_UNLCK) {
 		/* no local locks; consult the server */
 		ret = afs_fetch_status(vnode, key);
 		if (ret < 0)
 			goto error;
-		lock_count = vnode->status.lock_count;
-		if (lock_count) {
-			if (lock_count > 0)
-				fl->fl_type = F_RDLCK;
-			else
-				fl->fl_type = F_WRLCK;
-			fl->fl_start = 0;
-			fl->fl_end = OFFSET_MAX;
-		}
+
+		lock_count = READ_ONCE(vnode->status.lock_count);
+		if (lock_count > 0)
+			fl->fl_type = F_RDLCK;
+		else
+			fl->fl_type = F_WRLCK;
+		fl->fl_start = 0;
+		fl->fl_end = OFFSET_MAX;
 	}
 
+	ret = 0;
 error:
-	inode_unlock(&vnode->vfs_inode);
 	_leave(" = %d [%hd]", ret, fl->fl_type);
 	return ret;
 }
@@ -574,7 +637,7 @@
  */
 int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
 	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -597,7 +660,7 @@
  */
 int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
 	_enter("{%x:%u},%d,{t=%x,fl=%x}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
@@ -627,9 +690,13 @@
  */
 static void afs_fl_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
 	_enter("");
 
+	spin_lock(&vnode->lock);
 	list_add(&new->fl_u.afs.link, &fl->fl_u.afs.link);
+	spin_unlock(&vnode->lock);
 }
 
 /*
@@ -638,7 +705,12 @@
  */
 static void afs_fl_release_private(struct file_lock *fl)
 {
+	struct afs_vnode *vnode = AFS_FS_I(locks_inode(fl->fl_file));
+
 	_enter("");
 
-	list_del_init(&fl->fl_u.afs.link);
+	spin_lock(&vnode->lock);
+	afs_dequeue_lock(vnode, fl);
+	_debug("state %u for %p", vnode->lock_state, vnode);
+	spin_unlock(&vnode->lock);
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index bd8dcee..e03910c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -430,6 +430,16 @@
 	u8			name[AFS_MAXVOLNAME + 1]; /* NUL-padded volume name */
 };
 
+enum afs_lock_state {
+	AFS_VNODE_LOCK_NONE,		/* The vnode has no lock on the server */
+	AFS_VNODE_LOCK_WAITING_FOR_CB,	/* We're waiting for the server to break the callback */
+	AFS_VNODE_LOCK_SETTING,		/* We're asking the server for a lock */
+	AFS_VNODE_LOCK_GRANTED,		/* We have a lock on the server */
+	AFS_VNODE_LOCK_EXTENDING,	/* We're extending a lock on the server */
+	AFS_VNODE_LOCK_NEED_UNLOCK,	/* We need to unlock on the server */
+	AFS_VNODE_LOCK_UNLOCKING,	/* We're telling the server to unlock */
+};
+
 /*
  * AFS inode private data
  */
@@ -454,18 +464,16 @@
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_LOCKING	6		/* set if waiting for lock on vnode */
-#define AFS_VNODE_READLOCKED	7		/* set if vnode is read-locked on the server */
-#define AFS_VNODE_WRITELOCKED	8		/* set if vnode is write-locked on the server */
-#define AFS_VNODE_UNLOCKING	9		/* set if vnode is being unlocked on the server */
-#define AFS_VNODE_AUTOCELL	10		/* set if Vnode is an auto mount point */
-#define AFS_VNODE_PSEUDODIR	11		/* set if Vnode is a pseudo directory */
+#define AFS_VNODE_AUTOCELL	6		/* set if Vnode is an auto mount point */
+#define AFS_VNODE_PSEUDODIR	7 		/* set if Vnode is a pseudo directory */
 
 	struct list_head	wb_keys;	/* List of keys available for writeback */
 	struct list_head	pending_locks;	/* locks waiting to be granted */
 	struct list_head	granted_locks;	/* locks granted on this file */
 	struct delayed_work	lock_work;	/* work to be done in locking */
-	struct key		*unlock_key;	/* key to be used in unlocking */
+	struct key		*lock_key;	/* Key to be used in lock ops */
+	enum afs_lock_state	lock_state : 8;
+	afs_lock_type_t		lock_type : 8;
 
 	/* outstanding callback notification on this file */
 	struct afs_cb_interest	*cb_interest;	/* Server on which this resides */
@@ -843,6 +851,7 @@
 extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
+extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
 extern int afs_permission(struct inode *, int);
 extern void __exit afs_clean_up_permit_cache(void);
 
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index e728ca1..d04511f 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -46,8 +46,7 @@
 		return false;
 	}
 
-	if (test_bit(AFS_VNODE_READLOCKED, &vnode->flags) ||
-	    test_bit(AFS_VNODE_WRITELOCKED, &vnode->flags))
+	if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
 		fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
 	return true;
 }
@@ -117,7 +116,7 @@
 	case VSALVAGING:	m = "being salvaged";	break;
 	default:		m = "busy";		break;
 	}
-	
+
 	pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
 }
 
@@ -438,24 +437,67 @@
 
 	_enter("");
 
-	if (!cbi) {
-		fc->ac.error = -ESTALE;
+	switch (fc->ac.error) {
+	case SHRT_MAX:
+		if (!cbi) {
+			fc->ac.error = -ESTALE;
+			fc->flags |= AFS_FS_CURSOR_STOP;
+			return false;
+		}
+
+		fc->cbi = afs_get_cb_interest(vnode->cb_interest);
+
+		read_lock(&cbi->server->fs_lock);
+		alist = rcu_dereference_protected(cbi->server->addresses,
+						  lockdep_is_held(&cbi->server->fs_lock));
+		afs_get_addrlist(alist);
+		read_unlock(&cbi->server->fs_lock);
+		if (!alist) {
+			fc->ac.error = -ESTALE;
+			fc->flags |= AFS_FS_CURSOR_STOP;
+			return false;
+		}
+
+		fc->ac.alist = alist;
+		fc->ac.addr  = NULL;
+		fc->ac.start = READ_ONCE(alist->index);
+		fc->ac.index = fc->ac.start;
+		fc->ac.error = 0;
+		fc->ac.begun = false;
+		goto iterate_address;
+
+	case 0:
+	default:
+		/* Success or local failure.  Stop. */
 		fc->flags |= AFS_FS_CURSOR_STOP;
+		_leave(" = f [okay/local %d]", fc->ac.error);
 		return false;
+
+	case -ECONNABORTED:
+		fc->flags |= AFS_FS_CURSOR_STOP;
+		_leave(" = f [abort]");
+		return false;
+
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+		_debug("no conn");
+		goto iterate_address;
 	}
 
-	read_lock(&cbi->server->fs_lock);
-	alist = afs_get_addrlist(cbi->server->addresses);
-	read_unlock(&cbi->server->fs_lock);
-	if (!alist) {
-		fc->ac.error = -ESTALE;
-		fc->flags |= AFS_FS_CURSOR_STOP;
-		return false;
+iterate_address:
+	/* Iterate over the current server's address list to try and find an
+	 * address on which it will respond to us.
+	 */
+	if (afs_iterate_addresses(&fc->ac)) {
+		_leave(" = t");
+		return true;
 	}
 
-	fc->ac.alist = alist;
-	fc->ac.error = 0;
-	return true;
+	afs_end_cursor(&fc->ac);
+	return false;
 }
 
 /*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 46a881a..2b00097 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,8 +284,8 @@
  * permitted to be accessed with this authorisation, and if so, what access it
  * is granted
  */
-static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
-			    afs_access_t *_access)
+int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+		     afs_access_t *_access)
 {
 	struct afs_permits *permits;
 	bool valid = false;
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 26bad70..0ab3f84 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -17,7 +17,7 @@
 {
 	int i;
 
-	if (refcount_dec_and_test(&slist->usage)) {
+	if (slist && refcount_dec_and_test(&slist->usage)) {
 		for (i = 0; i < slist->nr_servers; i++) {
 			afs_put_cb_interest(net, slist->servers[i].cb_interest);
 			afs_put_server(net, slist->servers[i].server);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 18e46e3..cb5f8a3 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -119,6 +119,11 @@
 	}
 
 	if (f != t) {
+		if (PageWriteback(page)) {
+			trace_afs_page_dirty(vnode, tracepoint_string("alrdy"),
+					     page->index, priv);
+			goto flush_conflicting_write;
+		}
 		if (to < f || from > t)
 			goto flush_conflicting_write;
 		if (from < f)