ceph: fix cap flush race reentrancy

In e9964c10 we change cap flushing to do a delicate dance because some
inodes on the cap_dirty list could be in a migrating state (got EXPORT but
not IMPORT) in which we couldn't actually flush and move from
dirty->flushing, breaking the while (!empty) { process first } loop
structure.  It worked for a single sync thread, but was not reentrant and
triggered infinite loops when multiple syncers came along.

Instead, move inodes with dirty to a separate cap_dirty_migrating list
when in the limbo export-but-no-import state, allowing us to go back to
the simple loop structure (which was reentrant).  This is cleaner and more
robust.

Audited the cap_dirty users and this looks fine:
list_empty(&ci->i_dirty_item) is still a reliable indicator of whether we
have dirty caps (which list we're on is irrelevant) and list_del_init()
calls still do the right thing.

Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 591202b..1f72b00 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2635,6 +2635,7 @@
 			      struct ceph_mds_session *session,
 			      int *open_target_sessions)
 {
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
 	unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2671,6 +2672,19 @@
 			 * export targets, so that we get the matching IMPORT
 			 */
 			*open_target_sessions = 1;
+
+			/*
+			 * we can't flush dirty caps that we've seen the
+			 * EXPORT but no IMPORT for
+			 */
+			spin_lock(&mdsc->cap_dirty_lock);
+			if (!list_empty(&ci->i_dirty_item)) {
+				dout(" moving %p to cap_dirty_migrating\n",
+				     inode);
+				list_move(&ci->i_dirty_item,
+					  &mdsc->cap_dirty_migrating);
+			}
+			spin_unlock(&mdsc->cap_dirty_lock);
 		}
 		__ceph_remove_cap(cap);
 	}
@@ -2708,6 +2722,13 @@
 		ci->i_cap_exporting_issued = 0;
 		ci->i_cap_exporting_mseq = 0;
 		ci->i_cap_exporting_mds = -1;
+
+		spin_lock(&mdsc->cap_dirty_lock);
+		if (!list_empty(&ci->i_dirty_item)) {
+			dout(" moving %p back to cap_dirty\n", inode);
+			list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
+		}
+		spin_unlock(&mdsc->cap_dirty_lock);
 	} else {
 		dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
 		     inode, ci, mds, mseq);
@@ -2911,38 +2932,16 @@
  */
 void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 {
-	struct ceph_inode_info *ci, *nci = NULL;
-	struct inode *inode, *ninode = NULL;
-	struct list_head *p, *n;
+	struct ceph_inode_info *ci;
+	struct inode *inode;
 
 	dout("flush_dirty_caps\n");
 	spin_lock(&mdsc->cap_dirty_lock);
-	list_for_each_safe(p, n, &mdsc->cap_dirty) {
-		if (nci) {
-			ci = nci;
-			inode = ninode;
-			ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
-			dout("flush_dirty_caps inode %p (was next inode)\n",
-			     inode);
-		} else {
-			ci = list_entry(p, struct ceph_inode_info,
-					i_dirty_item);
-			inode = igrab(&ci->vfs_inode);
-			BUG_ON(!inode);
-			dout("flush_dirty_caps inode %p\n", inode);
-		}
-		if (n != &mdsc->cap_dirty) {
-			nci = list_entry(n, struct ceph_inode_info,
-					 i_dirty_item);
-			ninode = igrab(&nci->vfs_inode);
-			BUG_ON(!ninode);
-			nci->i_ceph_flags |= CEPH_I_NOFLUSH;
-			dout("flush_dirty_caps next inode %p, noflush\n",
-			     ninode);
-		} else {
-			nci = NULL;
-			ninode = NULL;
-		}
+	while (!list_empty(&mdsc->cap_dirty)) {
+		ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
+				      i_dirty_item);
+		inode = igrab(&ci->vfs_inode);
+		dout("flush_dirty_caps %p\n", inode);
 		spin_unlock(&mdsc->cap_dirty_lock);
 		if (inode) {
 			ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2952,6 +2951,7 @@
 		spin_lock(&mdsc->cap_dirty_lock);
 	}
 	spin_unlock(&mdsc->cap_dirty_lock);
+	dout("flush_dirty_caps done\n");
 }
 
 /*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c12d2e9..79743d1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3004,6 +3004,7 @@
 	spin_lock_init(&mdsc->snap_flush_lock);
 	mdsc->cap_flush_seq = 0;
 	INIT_LIST_HEAD(&mdsc->cap_dirty);
+	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
 	mdsc->num_cap_flushing = 0;
 	spin_lock_init(&mdsc->cap_dirty_lock);
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc..7d8a0d6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@
 
 	u64               cap_flush_seq;
 	struct list_head  cap_dirty;        /* inodes with dirty caps */
+	struct list_head  cap_dirty_migrating; /* ...that are migration... */
 	int               num_cap_flushing; /* # caps we are flushing */
 	spinlock_t        cap_dirty_lock;   /* protects above items */
 	wait_queue_head_t cap_flushing_wq;