Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
 "Final small batch of fixes to be included before -rc1.  Some general
  cleanups in here as well, but some of the blk-mq fixes we need for the
  NVMe conversion and/or scsi-mq.  The pull request contains:

   - Support for not merging across a specified "chunk size", if set by
     the driver.  Some NVMe devices perform poorly for IO that crosses
     such a chunk, so we need to support it generically as part of
     request merging avoid having to do complicated split logic.  From
     me.

   - Bump max tag depth to 10Ki tags.  Some scsi devices have a huge
     shared tag space.  Before we failed with EINVAL if a too large tag
     depth was specified, now we truncate it and pass back the actual
     value.  From me.

   - Various blk-mq rq init fixes from me and others.

   - A fix for enter on a dying queue for blk-mq from Keith.  This is
     needed to prevent oopsing on hot device removal.

   - Fixup for blk-mq timer addition from Ming Lei.

   - Small round of performance fixes for mtip32xx from Sam Bradshaw.

   - Minor stack leak fix from Rickard Strandqvist.

   - Two __init annotations from Fabian Frederick"

* 'for-linus' of git://git.kernel.dk/linux-block:
  block: add __init to blkcg_policy_register
  block: add __init to elv_register
  block: ensure that bio_add_page() always accepts a page for an empty bio
  blk-mq: add timer in blk_mq_start_request
  blk-mq: always initialize request->start_time
  block: blk-exec.c: Cleaning up local variable address returnd
  mtip32xx: minor performance enhancements
  blk-mq: ->timeout should be cleared in blk_mq_rq_ctx_init()
  blk-mq: don't allow queue entering for a dying queue
  blk-mq: bump max tag depth to 10K tags
  block: add blk_rq_set_block_pc()
  block: add notion of a chunk size for request merging
diff --git a/block/bio.c b/block/bio.c
index 1ba3365..8c2e55e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,7 +849,13 @@
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
+	unsigned int max_sectors;
+
+	max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
+	if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
+		max_sectors = len >> 9;
+
+	return __bio_add_page(q, bio, page, len, offset, max_sectors);
 }
 EXPORT_SYMBOL(bio_add_page);
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 9f5bce3..069bc20 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1093,7 +1093,7 @@
  * Register @pol with blkcg core.  Might sleep and @pol may be modified on
  * successful registration.  Returns 0 on success and -errno on failure.
  */
-int blkcg_policy_register(struct blkcg_policy *pol)
+int __init blkcg_policy_register(struct blkcg_policy *pol)
 {
 	int i, ret;
 
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index d692b29..cbb7f94 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -145,7 +145,7 @@
 void blkcg_exit_queue(struct request_queue *q);
 
 /* Blkio controller policy registration */
-int blkcg_policy_register(struct blkcg_policy *pol);
+int __init blkcg_policy_register(struct blkcg_policy *pol);
 void blkcg_policy_unregister(struct blkcg_policy *pol);
 int blkcg_activate_policy(struct request_queue *q,
 			  const struct blkcg_policy *pol);
@@ -580,7 +580,7 @@
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
+static inline int __init blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 static inline int blkcg_activate_policy(struct request_queue *q,
 					const struct blkcg_policy *pol) { return 0; }
diff --git a/block/blk-core.c b/block/blk-core.c
index 40d6548..9aca8c7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1218,6 +1218,8 @@
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 
+	blk_rq_set_block_pc(rq);
+
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
@@ -1235,6 +1237,22 @@
 EXPORT_SYMBOL(blk_make_request);
 
 /**
+ * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * @rq:		request to be initialized
+ *
+ */
+void blk_rq_set_block_pc(struct request *rq)
+{
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
+	memset(rq->__cmd, 0, sizeof(rq->__cmd));
+	rq->cmd = rq->__cmd;
+}
+EXPORT_SYMBOL(blk_rq_set_block_pc);
+
+/**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
  * @rq:		request to be inserted
diff --git a/block/blk-exec.c b/block/blk-exec.c
index dbf4502..f4d27b1 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -132,6 +132,11 @@
 	if (rq->errors)
 		err = -EIO;
 
+	if (rq->sense == sense)	{
+		rq->sense = NULL;
+		rq->sense_len = 0;
+	}
+
 	return err;
 }
 EXPORT_SYMBOL(blk_execute_rq);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e4cd62..e11f5f8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -82,8 +82,10 @@
 
 	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
 	smp_wmb();
-	/* we have problems to freeze the queue if it's initializing */
-	if (!blk_queue_bypass(q) || !blk_queue_init_done(q))
+
+	/* we have problems freezing the queue if it's initializing */
+	if (!blk_queue_dying(q) &&
+	    (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
 		return 0;
 
 	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
@@ -183,6 +185,7 @@
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->rq_disk = NULL;
 	rq->part = NULL;
+	rq->start_time = jiffies;
 #ifdef CONFIG_BLK_CGROUP
 	rq->rl = NULL;
 	set_start_time_ns(rq);
@@ -202,6 +205,8 @@
 	rq->sense = NULL;
 
 	INIT_LIST_HEAD(&rq->timeout_list);
+	rq->timeout = 0;
+
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
@@ -406,16 +411,7 @@
 	if (unlikely(blk_bidi_rq(rq)))
 		rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
 
-	/*
-	 * Just mark start time and set the started bit. Due to memory
-	 * ordering, we know we'll see the correct deadline as long as
-	 * REQ_ATOMIC_STARTED is seen. Use the default queue timeout,
-	 * unless one has been set in the request.
-	 */
-	if (!rq->timeout)
-		rq->deadline = jiffies + q->rq_timeout;
-	else
-		rq->deadline = jiffies + rq->timeout;
+	blk_add_timer(rq);
 
 	/*
 	 * Mark us as started and clear complete. Complete might have been
@@ -967,11 +963,6 @@
 		list_add_tail(&rq->queuelist, &ctx->rq_list);
 
 	blk_mq_hctx_mark_pending(hctx, ctx);
-
-	/*
-	 * We do this early, to ensure we are on the right CPU.
-	 */
-	blk_add_timer(rq);
 }
 
 void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
@@ -1100,10 +1091,8 @@
 {
 	init_request_from_bio(rq, bio);
 
-	if (blk_do_io_stat(rq)) {
-		rq->start_time = jiffies;
+	if (blk_do_io_stat(rq))
 		blk_account_io_start(rq, 1);
-	}
 }
 
 static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
@@ -1216,7 +1205,6 @@
 
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_start_request(rq, true);
-		blk_add_timer(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
@@ -1967,13 +1955,19 @@
 	return NOTIFY_OK;
 }
 
+/*
+ * Alloc a tag set to be associated with one or more request queues.
+ * May fail with EINVAL for various error conditions. May adjust the
+ * requested depth down, if if it too large. In that case, the set
+ * value will be stored in set->queue_depth.
+ */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
 	int i;
 
 	if (!set->nr_hw_queues)
 		return -EINVAL;
-	if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+	if (!set->queue_depth)
 		return -EINVAL;
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
@@ -1981,6 +1975,11 @@
 	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
+	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+		pr_info("blk-mq: reduced tag depth to %u\n",
+			BLK_MQ_MAX_DEPTH);
+		set->queue_depth = BLK_MQ_MAX_DEPTH;
+	}
 
 	set->tags = kmalloc_node(set->nr_hw_queues *
 				 sizeof(struct blk_mq_tags *),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5d21239..f1a1795 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -277,6 +278,26 @@
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
+ * blk_queue_chunk_sectors - set size of the chunk for this queue
+ * @q:  the request queue for the device
+ * @chunk_sectors:  chunk sectors in the usual 512b unit
+ *
+ * Description:
+ *    If a driver doesn't want IOs to cross a given chunk size, it can set
+ *    this limit and prevent merging across chunks. Note that the chunk size
+ *    must currently be a power-of-2 in sectors. Also note that the block
+ *    layer must accept a page worth of data at any offset. So if the
+ *    crossing of chunks is a hard limitation in the driver, it must still be
+ *    prepared to split single page bios.
+ **/
+void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
+{
+	BUG_ON(!is_power_of_2(chunk_sectors));
+	q->limits.chunk_sectors = chunk_sectors;
+}
+EXPORT_SYMBOL(blk_queue_chunk_sectors);
+
+/**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
  * @q:  the request queue for the device
  * @max_discard_sectors: maximum number of sectors to discard
diff --git a/block/bsg.c b/block/bsg.c
index e5214c1..ff46add 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -196,7 +196,6 @@
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->request_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -273,6 +272,8 @@
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
+	blk_rq_set_block_pc(rq);
+
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
 		goto out;
diff --git a/block/elevator.c b/block/elevator.c
index 1e01b66..f35eddd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -845,7 +845,7 @@
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int elv_register(struct elevator_type *e)
+int __init elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9c28a5b..14695c6 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -229,7 +229,6 @@
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -311,6 +310,7 @@
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
 		return -ENOMEM;
+	blk_rq_set_block_pc(rq);
 
 	if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
 		blk_put_request(rq);
@@ -491,7 +491,7 @@
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -524,7 +524,7 @@
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 74abd49..295f3af 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -39,6 +39,7 @@
 #include <../drivers/ata/ahci.h>
 #include <linux/export.h>
 #include <linux/debugfs.h>
+#include <linux/prefetch.h>
 #include "mtip32xx.h"
 
 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
@@ -2380,6 +2381,8 @@
 	/* Map the scatter list for DMA access */
 	nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
 
+	prefetch(&port->flags);
+
 	command->scatter_ents = nents;
 
 	/*
@@ -2392,7 +2395,7 @@
 	fis = command->command;
 	fis->type        = 0x27;
 	fis->opts        = 1 << 7;
-	if (rq_data_dir(rq) == READ)
+	if (dma_dir == DMA_FROM_DEVICE)
 		fis->command = ATA_CMD_FPDMA_READ;
 	else
 		fis->command = ATA_CMD_FPDMA_WRITE;
@@ -2412,7 +2415,7 @@
 	fis->res3        = 0;
 	fill_command_sg(dd, command, nents);
 
-	if (command->unaligned)
+	if (unlikely(command->unaligned))
 		fis->device |= 1 << 7;
 
 	/* Populate the command header */
@@ -2433,7 +2436,7 @@
 	 * To prevent this command from being issued
 	 * if an internal command is in progress or error handling is active.
 	 */
-	if (port->flags & MTIP_PF_PAUSE_IO) {
+	if (unlikely(port->flags & MTIP_PF_PAUSE_IO)) {
 		set_bit(rq->tag, port->cmds_to_issue);
 		set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
 		return;
@@ -3754,7 +3757,7 @@
 	struct driver_data *dd = hctx->queue->queuedata;
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
-	if (!dd->unal_qdepth || rq_data_dir(rq) == READ)
+	if (rq_data_dir(rq) == READ || !dd->unal_qdepth)
 		return false;
 
 	/*
@@ -3776,11 +3779,11 @@
 {
 	int ret;
 
-	if (mtip_check_unal_depth(hctx, rq))
+	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
 	ret = mtip_submit_request(hctx, rq);
-	if (!ret)
+	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
 
 	rq->errors = ret;
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 4b9b554..ba1b31e 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -493,19 +493,19 @@
 
 	struct workqueue_struct *isr_workq;
 
-	struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
-
 	atomic_t irq_workers_active;
 
+	struct mtip_work work[MTIP_MAX_SLOT_GROUPS];
+
 	int isr_binding;
 
 	struct block_device *bdev;
 
-	int unal_qdepth; /* qdepth of unaligned IO queue */
-
 	struct list_head online_list; /* linkage for online list */
 
 	struct list_head remove_list; /* linkage for removing list */
+
+	int unal_qdepth; /* qdepth of unaligned IO queue */
 };
 
 #endif
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ef166ad..758ac44 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,6 +704,7 @@
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
 			     WRITE : READ, __GFP_WAIT);
+	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,7 +717,6 @@
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	if (cgc->quiet)
 		rq->cmd_flags |= REQ_QUIET;
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 2a44767..898b84b 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2184,6 +2184,7 @@
 			ret = -ENOMEM;
 			break;
 		}
+		blk_rq_set_block_pc(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
@@ -2203,7 +2204,6 @@
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5248c88..7bcf67e 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -120,6 +120,7 @@
 			    "%s: blk_get_request failed\n", __func__);
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -128,7 +129,6 @@
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = ALUA_FAILOVER_RETRIES;
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index e1c8be06..6f07f7fe 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -280,6 +280,7 @@
 		return NULL;
 	}
 
+	blk_rq_set_block_pc(rq);
 	rq->cmd_len = COMMAND_SIZE(cmd);
 	rq->cmd[0] = cmd;
 
@@ -304,7 +305,6 @@
 		break;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->timeout = CLARIION_TIMEOUT;
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 084062b..e9d9fea 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -120,7 +120,7 @@
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
@@ -250,7 +250,7 @@
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(START_STOP);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 4b9cf93..826069d 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -279,6 +279,7 @@
 				"get_rdac_req: blk_get_request failed.\n");
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -287,7 +288,6 @@
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = RDAC_RETRIES;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index bac04c2..5f4cbf0 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1570,6 +1570,7 @@
 		if (unlikely(!req))
 			return ERR_PTR(-ENOMEM);
 
+		blk_rq_set_block_pc(req);
 		return req;
 	}
 }
@@ -1590,7 +1591,6 @@
 	}
 
 	or->request = req;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 
 	req->timeout = or->timeout;
@@ -1608,7 +1608,7 @@
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			req->cmd_type = REQ_TYPE_BLOCK_PC;
+			blk_rq_set_block_pc(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 21883a2..0727ea7 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -365,7 +365,7 @@
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 47a1ffc..cbe38e5 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1952,6 +1952,8 @@
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
 
+	blk_rq_set_block_pc(req);
+
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
 	req->cmd[2] = 0;
@@ -1961,7 +1963,6 @@
 
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 	req->timeout = 10 * HZ;
 	req->retries = 5;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index be0d5fa..f7e3163 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -195,6 +195,7 @@
 	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
 	if (!req)
 		return ret;
+	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_WAIT))
@@ -206,7 +207,6 @@
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index df5e961..53268aa 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1653,10 +1653,9 @@
 	if (!rq)
 		return -ENOMEM;
 
+	blk_rq_set_block_pc(rq);
 	memcpy(rq->cmd, cmd, hp->cmd_len);
-
 	rq->cmd_len = hp->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	srp->rq = rq;
 	rq->end_io_data = srp;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index afc834e..14eb4b2 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -484,7 +484,7 @@
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 0f199f6..94d00df 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1055,6 +1055,8 @@
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 			goto fail;
 		}
+
+		blk_rq_set_block_pc(req);
 	} else {
 		BUG_ON(!cmd->data_length);
 
@@ -1071,7 +1073,6 @@
 		}
 	}
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->end_io = pscsi_req_done;
 	req->end_io_data = cmd;
 	req->cmd_len = scsi_command_size(pt->pscsi_cdb);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0feedeb..a002cf1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -135,7 +135,7 @@
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 
-	BLK_MQ_MAX_DEPTH	= 2048,
+	BLK_MQ_MAX_DEPTH	= 10240,
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3cd426e..31e1105 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -280,6 +280,7 @@
 	unsigned long		seg_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
@@ -795,6 +796,7 @@
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
+extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
@@ -910,6 +912,20 @@
 	return q->limits.max_sectors;
 }
 
+/*
+ * Return maximum size of a request at given offset. Only valid for
+ * file system requests.
+ */
+static inline unsigned int blk_max_size_offset(struct request_queue *q,
+					       sector_t offset)
+{
+	if (!q->limits.chunk_sectors)
+		return q->limits.max_hw_sectors;
+
+	return q->limits.chunk_sectors -
+			(offset & (q->limits.chunk_sectors - 1));
+}
+
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -917,7 +933,11 @@
 	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		return q->limits.max_hw_sectors;
 
-	return blk_queue_get_max_sectors(q, rq->cmd_flags);
+	if (!q->limits.chunk_sectors)
+		return blk_queue_get_max_sectors(q, rq->cmd_flags);
+
+	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
+			blk_queue_get_max_sectors(q, rq->cmd_flags));
 }
 
 static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -983,6 +1003,7 @@
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index df63bd3..4ff262e 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -144,7 +144,7 @@
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int elv_register(struct elevator_type *);
+extern int __init elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*