xfs: prevent unwritten extent conversion from blocking I/O completion
Unwritten extent conversion can recurse back into the filesystem due
to memory allocation. Memory reclaim requires I/O completions to be
processed to allow the callers to make progress. If the I/O
completion workqueue thread is doing the recursion, then we have a
deadlock situation.
Move unwritten extent completion into it's own workqueue so it
doesn't block I/O completions for normal delayed allocation or
overwrite data.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f673..7ec89fc 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@
}
/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
- xfs_ioend_t *ioend,
- int wait)
-{
- if (atomic_dec_and_test(&ioend->io_remaining)) {
- queue_work(xfsdatad_workqueue, &ioend->io_work);
- if (wait)
- flush_workqueue(xfsdatad_workqueue);
- }
-}
-
-/*
* We're now finished for good with this ioend structure.
* Update the page state via the associated buffer_heads,
* release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@
}
/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+ xfs_ioend_t *ioend,
+ int wait)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining)) {
+ struct workqueue_struct *wq = xfsdatad_workqueue;
+ if (ioend->io_work.func == xfs_end_bio_unwritten)
+ wq = xfsconvertd_workqueue;
+
+ queue_work(wq, &ioend->io_work);
+ if (wait)
+ flush_workqueue(wq);
+ }
+}
+
+/*
* Allocate and initialise an IO completion structure.
* We need to track unwritten extent write completion here initially.
* We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd5288..221b3e6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
#define __XFS_AOPS_H__
extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016b..e28800a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
#ifdef XFS_BUF_TRACE
void
@@ -1775,6 +1776,7 @@
xfs_buf_t *bp, *n;
int pincount = 0;
+ xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
@@ -1831,9 +1833,15 @@
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
+ xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ if (!xfsconvertd_workqueue)
+ goto out_destroy_xfsdatad_workqueue;
+
register_shrinker(&xfs_buf_shake);
return 0;
+ out_destroy_xfsdatad_workqueue:
+ destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
@@ -1849,6 +1857,7 @@
xfs_buf_terminate(void)
{
unregister_shrinker(&xfs_buf_shake);
+ destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);