drbd: If we detect late that IO got frozen, retry after we thawed.
If we detect late (= after grabing mdev->req_lock) that IO got frozen, we
return 1 to generic_make_request(), which simply will retry to make a
request for that bio.
In the subsequent call of generic_make_request() into drbd_make_request_26()
we sleep in inc_ap_bio().
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 4b97f30..c194348 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -2223,7 +2223,7 @@
/* I'd like to use wait_event_lock_irq,
* but I'm not sure when it got introduced,
* and not sure when it has 3 or 4 arguments */
-static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
+static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
{
/* compare with after_state_ch,
* os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
@@ -2245,7 +2245,7 @@
finish_wait(&mdev->misc_wait, &wait);
spin_lock_irq(&mdev->req_lock);
}
- atomic_add(one_or_two, &mdev->ap_bio_cnt);
+ atomic_add(count, &mdev->ap_bio_cnt);
spin_unlock_irq(&mdev->req_lock);
}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 343e0e6..3397f11 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -722,6 +722,7 @@
struct drbd_request *req;
int local, remote;
int err = -EIO;
+ int ret = 0;
/* allocate outside of all locks; */
req = drbd_req_new(mdev, bio);
@@ -784,7 +785,7 @@
(mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED));
- if (!(local || remote)) {
+ if (!(local || remote) && !mdev->state.susp) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete;
}
@@ -810,6 +811,16 @@
/* GOOD, everything prepared, grab the spin_lock */
spin_lock_irq(&mdev->req_lock);
+ if (mdev->state.susp) {
+ /* If we got suspended, use the retry mechanism of
+ generic_make_request() to restart processing of this
+ bio. In the next call to drbd_make_request_26
+ we sleep in inc_ap_bio() */
+ ret = 1;
+ spin_unlock_irq(&mdev->req_lock);
+ goto fail_free_complete;
+ }
+
if (remote) {
remote = (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT &&
@@ -947,12 +958,14 @@
req->private_bio = NULL;
put_ldev(mdev);
}
- bio_endio(bio, err);
+ if (!ret)
+ bio_endio(bio, err);
+
drbd_req_free(req);
dec_ap_bio(mdev);
kfree(b);
- return 0;
+ return ret;
}
/* helper function for drbd_make_request
@@ -1065,15 +1078,21 @@
/* we need to get a "reference count" (ap_bio_cnt)
* to avoid races with the disconnect/reconnect/suspend code.
- * In case we need to split the bio here, we need to get two references
+ * In case we need to split the bio here, we need to get three references
* atomically, otherwise we might deadlock when trying to submit the
* second one! */
- inc_ap_bio(mdev, 2);
+ inc_ap_bio(mdev, 3);
D_ASSERT(e_enr == s_enr + 1);
- drbd_make_request_common(mdev, &bp->bio1);
- drbd_make_request_common(mdev, &bp->bio2);
+ while (drbd_make_request_common(mdev, &bp->bio1))
+ inc_ap_bio(mdev, 1);
+
+ while (drbd_make_request_common(mdev, &bp->bio2))
+ inc_ap_bio(mdev, 1);
+
+ dec_ap_bio(mdev);
+
bio_pair_release(bp);
}
return 0;