drbd: fix potential access of on-stack wait_queue_head_t after return I run into something declaring itself as "spinlock deadlock", BUG: spinlock lockup on CPU#1, kjournald/27816, ffff88000ad6bca0 Pid: 27816, comm: kjournald Tainted: G W 2.6.34.6 #2 Call Trace: <IRQ> [<ffffffff811ba0aa>] do_raw_spin_lock+0x11e/0x14d [<ffffffff81340fde>] _raw_spin_lock_irqsave+0x6a/0x81 [<ffffffff8103b694>] ? __wake_up+0x22/0x50 [<ffffffff8103b694>] __wake_up+0x22/0x50 [<ffffffffa07ff661>] bm_async_io_complete+0x258/0x299 [drbd] but the call traces do not fit at all, all other cpus are cpu_idle. I think it may be this race: drbd_bm_write_page wait_queue_head_t io_wait; atomic_t in_flight; bm_async_io submit_bio bm_async_io_complete if (atomic_dec_and_test(in_flight)) wait_event(io_wait, atomic_read(in_flight) == 0) return wake_up(io_wait) The wake_up now accesses the wait_queue_head_t spinlock, which is no longer valid, since the stack frame of drbd_bm_write_page has been clobbered now. Fix this by using struct completion, which does both the condition test as well as the wake_up inside its spinlock, so this race cannot happen. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>

commit: 725a97e43ee945cc813fffd9e628e50d703b973b [log] [tgz]
author: Lars Ellenberg <lars.ellenberg@linbit.com> Sun Dec 19 11:29:55 2010 +0100
committer: Philipp Reisner <philipp.reisner@linbit.com> Thu Mar 10 11:45:08 2011 +0100
tree: ec67dbfccf0b3a43cb879056a1fb320b82b8dd2d
parent: 06d33e968d2c58143a7aaafa8963cf6a58099467 [diff] [blame]
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 314a363..25428bc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c

@@ -897,7 +897,7 @@
 struct bm_aio_ctx {
 	struct drbd_conf *mdev;
 	atomic_t in_flight;
-	wait_queue_head_t io_wait;
+	struct completion done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
 	int error;
@@ -948,7 +948,7 @@
 	bio_put(bio);
 
 	if (atomic_dec_and_test(&ctx->in_flight))
-		wake_up(&ctx->io_wait);
+		complete(&ctx->done);
 }
 
 static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
@@ -1009,8 +1009,12 @@
  */
 static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx =
-		{ .flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0 };
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
+	};
 	struct drbd_bitmap *b = mdev->bitmap;
 	int num_pages, i, count = 0;
 	unsigned long now;
@@ -1031,10 +1035,6 @@
 	num_pages = b->bm_number_of_pages;
 
 	now = jiffies;
-	ctx.mdev = mdev;
-	atomic_set(&ctx.in_flight, 1); /* one extra ref */
-	init_waitqueue_head(&ctx.io_wait);
-	ctx.error = 0;
 
 	/* let the layers below us try to merge these bios... */
 	for (i = 0; i < num_pages; i++) {
@@ -1060,8 +1060,13 @@
 		cond_resched();
 	}
 
-	atomic_dec(&ctx.in_flight); /* drop the extra ref */
-	wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0);
+	/*
+	 * We initialize ctx.in_flight to one to make sure bm_async_io_complete
+	 * will not complete() early, and decrement / test it here.  If there
+	 * are still some bios in flight, we need to wait for them here.
+	 */
+	if (!atomic_dec_and_test(&ctx.in_flight))
+		wait_for_completion(&ctx.done);
 	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
 			rw == WRITE ? "WRITE" : "READ",
 			count, jiffies - now);
@@ -1133,19 +1138,20 @@
  */
 int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
 {
-	struct bm_aio_ctx ctx = { .flags = BM_AIO_COPY_PAGES, };
+	struct bm_aio_ctx ctx = {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
+		.flags = BM_AIO_COPY_PAGES,
+	};
 
 	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
 		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
 		return 0;
 	}
 
-	ctx.mdev = mdev;
-	atomic_set(&ctx.in_flight, 1);
-	init_waitqueue_head(&ctx.io_wait);
-
 	bm_page_io_async(&ctx, idx, WRITE_SYNC);
-	wait_event(ctx.io_wait, atomic_read(&ctx.in_flight) == 0);
+	wait_for_completion(&ctx.done);
 
 	if (ctx.error)
 		drbd_chk_io_error(mdev, 1, true);
commit	725a97e43ee945cc813fffd9e628e50d703b973b	[log] [tgz]
author	Lars Ellenberg <lars.ellenberg@linbit.com>	Sun Dec 19 11:29:55 2010 +0100
committer	Philipp Reisner <philipp.reisner@linbit.com>	Thu Mar 10 11:45:08 2011 +0100
tree	ec67dbfccf0b3a43cb879056a1fb320b82b8dd2d
parent	06d33e968d2c58143a7aaafa8963cf6a58099467 [diff] [blame]