drbd: Bitmap IO functions can now return prematurely if the disk breaks

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index a2c337b3..e5e756d 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -886,7 +886,7 @@
 struct bm_aio_ctx {
 	struct drbd_conf *mdev;
 	atomic_t in_flight;
-	struct completion done;
+	unsigned int done;
 	unsigned flags;
 #define BM_AIO_COPY_PAGES	1
 	int error;
@@ -897,6 +897,7 @@
 {
 	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
 
+	put_ldev(ctx->mdev);
 	kfree(ctx);
 }
 
@@ -945,7 +946,8 @@
 	bio_put(bio);
 
 	if (atomic_dec_and_test(&ctx->in_flight)) {
-		complete(&ctx->done);
+		ctx->done = 1;
+		wake_up(&mdev->misc_wait);
 		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	}
 }
@@ -1034,12 +1036,18 @@
 	*ctx = (struct bm_aio_ctx) {
 		.mdev = mdev,
 		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER(ctx->done),
+		.done = 0,
 		.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
 		.error = 0,
 		.kref = { ATOMIC_INIT(2) },
 	};
 
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
+		kfree(ctx);
+		return -ENODEV;
+	}
+
 	if (!ctx->flags)
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
@@ -1073,11 +1081,16 @@
 
 	/*
 	 * We initialize ctx->in_flight to one to make sure bm_async_io_complete
-	 * will not complete() early, and decrement / test it here.  If there
+	 * will not set ctx->done early, and decrement / test it here.  If there
 	 * are still some bios in flight, we need to wait for them here.
+	 * If all IO is done already (or nothing had been submitted), there is
+	 * no need to wait.  Still, we need to put the kref associated with the
+	 * "in_flight reached zero, all done" event.
 	 */
 	if (!atomic_dec_and_test(&ctx->in_flight))
-		wait_for_completion(&ctx->done);
+		wait_until_done_or_disk_failure(mdev, &ctx->done);
+	else
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
 	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
 			rw == WRITE ? "WRITE" : "READ",
@@ -1089,6 +1102,9 @@
 		err = -EIO; /* ctx->error ? */
 	}
 
+	if (atomic_read(&ctx->in_flight))
+		err = -EIO; /* Disk failed during IO... */
+
 	now = jiffies;
 	if (rw == WRITE) {
 		drbd_md_flush(mdev);
@@ -1103,7 +1119,6 @@
 	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
 	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
-
 	return err;
 }
 
@@ -1167,14 +1182,20 @@
 	*ctx = (struct bm_aio_ctx) {
 		.mdev = mdev,
 		.in_flight = ATOMIC_INIT(1),
-		.done = COMPLETION_INITIALIZER(ctx->done),
+		.done = 0,
 		.flags = BM_AIO_COPY_PAGES,
 		.error = 0,
 		.kref = { ATOMIC_INIT(2) },
 	};
 
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
+		kfree(ctx);
+		return -ENODEV;
+	}
+
 	bm_page_io_async(ctx, idx, WRITE_SYNC);
-	wait_for_completion(&ctx->done);
+	wait_until_done_or_disk_failure(mdev, &ctx->done);
 
 	if (ctx->error)
 		drbd_chk_io_error(mdev, 1, true);
@@ -1182,9 +1203,8 @@
 		 * gone in a moment as well. */
 
 	mdev->bm_writ_cnt++;
-	err = ctx->error;
+	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
 	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
-
 	return err;
 }