drbd: announce FLUSH/FUA capability to upper layers
Unconditionally announce FLUSH/FUA to upper layers.
If the lower layers on either node do not actually support this,
generic_make_request() will deal with it.
If this causes performance regressions on your setup,
make sure there are no volatile caches involved,
and mount -o nobarrier or equivalent.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6ace11e..3fbef01 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -876,7 +876,11 @@
unsigned int enr, count = 0;
struct lc_element *e;
- if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
+ /* this should be an empty REQ_FLUSH */
+ if (size == 0)
+ return 0;
+
+ if (size < 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
dev_err(DEV, "sector: %llus, size: %d\n",
(unsigned long long)sector, size);
return 0;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 1ee1404..2e0e7fc 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3636,6 +3636,7 @@
q->backing_dev_info.congested_data = mdev;
blk_queue_make_request(q, drbd_make_request);
+ blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
/* Setting the max_hw_sectors to an odd value of 8kibyte here
This triggers a max_bio_size message upon first attach or connect */
blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 83d9913..c74ca2d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -277,6 +277,9 @@
atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
int i;
+ if (page == NULL)
+ return;
+
if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
i = page_chain_free(page);
else {
@@ -316,7 +319,7 @@
gfp_t gfp_mask) __must_hold(local)
{
struct drbd_epoch_entry *e;
- struct page *page;
+ struct page *page = NULL;
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
@@ -329,9 +332,11 @@
return NULL;
}
- page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
- if (!page)
- goto fail;
+ if (data_size) {
+ page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+ if (!page)
+ goto fail;
+ }
INIT_HLIST_NODE(&e->collision);
e->epoch = NULL;
@@ -1270,7 +1275,6 @@
data_size -= dgs;
- ERR_IF(data_size == 0) return NULL;
ERR_IF(data_size & 0x1ff) return NULL;
ERR_IF(data_size > DRBD_MAX_BIO_SIZE) return NULL;
@@ -1291,6 +1295,9 @@
if (!e)
return NULL;
+ if (!data_size)
+ return e;
+
ds = data_size;
page = e->pages;
page_chain_for_each(page) {
@@ -1715,6 +1722,10 @@
dp_flags = be32_to_cpu(p->dp_flags);
rw |= wire_flags_to_bio(mdev, dp_flags);
+ if (e->pages == NULL) {
+ D_ASSERT(e->size == 0);
+ D_ASSERT(dp_flags & DP_FLUSH);
+ }
if (dp_flags & DP_MAY_SET_IN_SYNC)
e->flags |= EE_MAY_SET_IN_SYNC;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 1f4b2db..910335c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1111,13 +1111,12 @@
/*
* what we "blindly" assume:
*/
- D_ASSERT(bio->bi_size > 0);
D_ASSERT((bio->bi_size & 0x1ff) == 0);
/* to make some things easier, force alignment of requests within the
* granularity of our hash tables */
s_enr = bio->bi_sector >> HT_SHIFT;
- e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
+ e_enr = bio->bi_size ? (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT : s_enr;
if (likely(s_enr == e_enr)) {
do {