Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  libata: implement drain buffers
  libata: eliminate the home grown dma padding in favour of
  block: clear drain buffer if draining for write command
  block: implement request_queue->dma_drain_needed
  block: add request->raw_data_len
  block: update bio according to DMA alignment padding
  libata: update ATAPI overflow draining
  elevator: make elevator_get() attempt to load the appropriate module
  cfq-iosched: add hlist for browsing parallel to the radix tree
  block: make blk_rq_map_user() clear ->bio if it unmaps it
  fs/block_dev.c: remove #if 0'ed code
  make struct def_blk_aops static
  make blk_settings_init() static
  make blk_ioc_init() static
  make blk-core.c:request_cachep static again
diff --git a/block/blk-core.c b/block/blk-core.c
index e9754dc..775c851 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -38,7 +38,7 @@
 /*
  * For the allocated request tables
  */
-struct kmem_cache *request_cachep;
+static struct kmem_cache *request_cachep;
 
 /*
  * For queue allocation
@@ -127,6 +127,7 @@
 	rq->nr_hw_segments = 0;
 	rq->ioprio = 0;
 	rq->special = NULL;
+	rq->raw_data_len = 0;
 	rq->buffer = NULL;
 	rq->tag = -1;
 	rq->errors = 0;
@@ -2015,6 +2016,7 @@
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->buffer = bio_data(bio);
+	rq->raw_data_len = bio->bi_size;
 	rq->data_len = bio->bi_size;
 
 	rq->bio = rq->biotail = bio;
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 80245dc..e34df7c 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -17,17 +17,13 @@
 
 static void cfq_dtor(struct io_context *ioc)
 {
-	struct cfq_io_context *cic[1];
-	int r;
+	if (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic;
 
-	/*
-	 * We don't have a specific key to lookup with, so use the gang
-	 * lookup to just retrieve the first item stored. The cfq exit
-	 * function will iterate the full tree, so any member will do.
-	 */
-	r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
-	if (r > 0)
-		cic[0]->dtor(ioc);
+		cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
+								cic_list);
+		cic->dtor(ioc);
+	}
 }
 
 /*
@@ -57,18 +53,16 @@
 
 static void cfq_exit(struct io_context *ioc)
 {
-	struct cfq_io_context *cic[1];
-	int r;
-
 	rcu_read_lock();
-	/*
-	 * See comment for cfq_dtor()
-	 */
-	r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1);
-	rcu_read_unlock();
 
-	if (r > 0)
-		cic[0]->exit(ioc);
+	if (!hlist_empty(&ioc->cic_list)) {
+		struct cfq_io_context *cic;
+
+		cic = list_entry(ioc->cic_list.first, struct cfq_io_context,
+								cic_list);
+		cic->exit(ioc);
+	}
+	rcu_read_unlock();
 }
 
 /* Called by the exitting task */
@@ -105,6 +99,7 @@
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
+		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
 	}
 
@@ -176,7 +171,7 @@
 }
 EXPORT_SYMBOL(copy_io_context);
 
-int __init blk_ioc_init(void)
+static int __init blk_ioc_init(void)
 {
 	iocontext_cachep = kmem_cache_create("blkdev_ioc",
 			sizeof(struct io_context), 0, SLAB_PANIC, NULL);
diff --git a/block/blk-map.c b/block/blk-map.c
index 955d75c..09f7fd0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -19,6 +19,7 @@
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
+		rq->raw_data_len += bio->bi_size;
 		rq->data_len += bio->bi_size;
 	}
 	return 0;
@@ -139,10 +140,29 @@
 		ubuf += ret;
 	}
 
+	/*
+	 * __blk_rq_map_user() copies the buffers if starting address
+	 * or length isn't aligned.  As the copied buffer is always
+	 * page aligned, we know that there's enough room for padding.
+	 * Extend the last bio and update rq->data_len accordingly.
+	 *
+	 * On unmap, bio_uncopy_user() will use unmodified
+	 * bio_map_data pointed to by bio->bi_private.
+	 */
+	if (len & queue_dma_alignment(q)) {
+		unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1;
+		struct bio *bio = rq->biotail;
+
+		bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len;
+		bio->bi_size += pad_len;
+		rq->data_len += pad_len;
+	}
+
 	rq->buffer = rq->data = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
+	rq->bio = NULL;
 	return ret;
 }
 EXPORT_SYMBOL(blk_rq_map_user);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d3b84bb..7506c4f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -220,7 +220,10 @@
 		bvprv = bvec;
 	} /* segments in rq */
 
-	if (q->dma_drain_size) {
+	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
+		if (rq->cmd_flags & REQ_RW)
+			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
+
 		sg->page_link &= ~0x02;
 		sg = sg_next(sg);
 		sg_set_page(sg, virt_to_page(q->dma_drain_buffer),
@@ -228,6 +231,7 @@
 			    ((unsigned long)q->dma_drain_buffer) &
 			    (PAGE_SIZE - 1));
 		nsegs++;
+		rq->data_len += q->dma_drain_size;
 	}
 
 	if (sg)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c8d0c57..9a8ffdd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -296,6 +296,7 @@
  * blk_queue_dma_drain - Set up a drain buffer for excess dma.
  *
  * @q:  the request queue for the device
+ * @dma_drain_needed: fn which returns non-zero if drain is necessary
  * @buf:	physically contiguous buffer
  * @size:	size of the buffer in bytes
  *
@@ -315,14 +316,16 @@
  * device can support otherwise there won't be room for the drain
  * buffer.
  */
-int blk_queue_dma_drain(struct request_queue *q, void *buf,
-				unsigned int size)
+extern int blk_queue_dma_drain(struct request_queue *q,
+			       dma_drain_needed_fn *dma_drain_needed,
+			       void *buf, unsigned int size)
 {
 	if (q->max_hw_segments < 2 || q->max_phys_segments < 2)
 		return -EINVAL;
 	/* make room for appending the drain */
 	--q->max_hw_segments;
 	--q->max_phys_segments;
+	q->dma_drain_needed = dma_drain_needed;
 	q->dma_drain_buffer = buf;
 	q->dma_drain_size = size;
 
@@ -386,7 +389,7 @@
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
-int __init blk_settings_init(void)
+static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
 	blk_max_pfn = max_pfn - 1;
diff --git a/block/bsg.c b/block/bsg.c
index 8917c51..7f3c095 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -437,14 +437,14 @@
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->data_len;
-		hdr->din_resid = rq->next_rq->data_len;
+		hdr->dout_resid = rq->raw_data_len;
+		hdr->din_resid = rq->next_rq->raw_data_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->data_len;
+		hdr->din_resid = rq->raw_data_len;
 	else
-		hdr->dout_resid = rq->data_len;
+		hdr->dout_resid = rq->raw_data_len;
 
 	/*
 	 * If the request generated a negative error number, return it
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ca198e6..0f962ec 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1145,38 +1145,19 @@
 /*
  * Call func for each cic attached to this ioc. Returns number of cic's seen.
  */
-#define CIC_GANG_NR	16
 static unsigned int
 call_for_each_cic(struct io_context *ioc,
 		  void (*func)(struct io_context *, struct cfq_io_context *))
 {
-	struct cfq_io_context *cics[CIC_GANG_NR];
-	unsigned long index = 0;
-	unsigned int called = 0;
-	int nr;
+	struct cfq_io_context *cic;
+	struct hlist_node *n;
+	int called = 0;
 
 	rcu_read_lock();
-
-	do {
-		int i;
-
-		/*
-		 * Perhaps there's a better way - this just gang lookups from
-		 * 0 to the end, restarting after each CIC_GANG_NR from the
-		 * last key + 1.
-		 */
-		nr = radix_tree_gang_lookup(&ioc->radix_root, (void **) cics,
-						index, CIC_GANG_NR);
-		if (!nr)
-			break;
-
-		called += nr;
-		index = 1 + (unsigned long) cics[nr - 1]->key;
-
-		for (i = 0; i < nr; i++)
-			func(ioc, cics[i]);
-	} while (nr == CIC_GANG_NR);
-
+	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) {
+		func(ioc, cic);
+		called++;
+	}
 	rcu_read_unlock();
 
 	return called;
@@ -1190,6 +1171,7 @@
 
 	spin_lock_irqsave(&ioc->lock, flags);
 	radix_tree_delete(&ioc->radix_root, cic->dead_key);
+	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
 	kmem_cache_free(cfq_ioc_pool, cic);
@@ -1280,6 +1262,7 @@
 	if (cic) {
 		cic->last_end_request = jiffies;
 		INIT_LIST_HEAD(&cic->queue_list);
+		INIT_HLIST_NODE(&cic->cic_list);
 		cic->dtor = cfq_free_io_context;
 		cic->exit = cfq_exit_io_context;
 		elv_ioc_count_inc(ioc_count);
@@ -1501,6 +1484,7 @@
 		rcu_assign_pointer(ioc->ioc_data, NULL);
 
 	radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd);
+	hlist_del_rcu(&cic->cic_list);
 	spin_unlock_irqrestore(&ioc->lock, flags);
 
 	cfq_cic_free(cic);
@@ -1561,6 +1545,8 @@
 		spin_lock_irqsave(&ioc->lock, flags);
 		ret = radix_tree_insert(&ioc->radix_root,
 						(unsigned long) cfqd, cic);
+		if (!ret)
+			hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
 		spin_unlock_irqrestore(&ioc->lock, flags);
 
 		radix_tree_preload_end();
diff --git a/block/elevator.c b/block/elevator.c
index bafbae0..88318c3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -134,6 +134,21 @@
 	spin_lock(&elv_list_lock);
 
 	e = elevator_find(name);
+	if (!e) {
+		char elv[ELV_NAME_MAX + strlen("-iosched")];
+
+		spin_unlock(&elv_list_lock);
+
+		if (!strcmp(name, "anticipatory"))
+			sprintf(elv, "as-iosched");
+		else
+			sprintf(elv, "%s-iosched", name);
+
+		request_module(elv);
+		spin_lock(&elv_list_lock);
+		e = elevator_find(name);
+	}
+
 	if (e && !try_module_get(e->elevator_owner))
 		e = NULL;
 
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9675b34..e993cac 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -266,7 +266,7 @@
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->data_len;
+	hdr->resid = rq->raw_data_len;
 	hdr->sb_len_wr = 0;
 
 	if (rq->sense_len && hdr->sbp) {
@@ -528,6 +528,7 @@
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
+	rq->raw_data_len = 0;
 	rq->data_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 29e71bd..3c06e45 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1975,16 +1975,11 @@
 	struct ahci_port_priv *pp;
 	void *mem;
 	dma_addr_t mem_dma;
-	int rc;
 
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
 		return -ENOMEM;
 
-	rc = ata_pad_alloc(ap, dev);
-	if (rc)
-		return rc;
-
 	mem = dmam_alloc_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, &mem_dma,
 				  GFP_KERNEL);
 	if (!mem)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index f46eb6f..def3682 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4493,30 +4493,13 @@
 	struct ata_port *ap = qc->ap;
 	struct scatterlist *sg = qc->sg;
 	int dir = qc->dma_dir;
-	void *pad_buf = NULL;
 
 	WARN_ON(sg == NULL);
 
-	VPRINTK("unmapping %u sg elements\n", qc->mapped_n_elem);
+	VPRINTK("unmapping %u sg elements\n", qc->n_elem);
 
-	/* if we padded the buffer out to 32-bit bound, and data
-	 * xfer direction is from-device, we must copy from the
-	 * pad buffer back into the supplied buffer
-	 */
-	if (qc->pad_len && !(qc->tf.flags & ATA_TFLAG_WRITE))
-		pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
-
-	if (qc->mapped_n_elem)
-		dma_unmap_sg(ap->dev, sg, qc->mapped_n_elem, dir);
-	/* restore last sg */
-	if (qc->last_sg)
-		*qc->last_sg = qc->saved_last_sg;
-	if (pad_buf) {
-		struct scatterlist *psg = &qc->extra_sg[1];
-		void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
-		memcpy(addr + psg->offset, pad_buf, qc->pad_len);
-		kunmap_atomic(addr, KM_IRQ0);
-	}
+	if (qc->n_elem)
+		dma_unmap_sg(ap->dev, sg, qc->n_elem, dir);
 
 	qc->flags &= ~ATA_QCFLAG_DMAMAP;
 	qc->sg = NULL;
@@ -4659,43 +4642,6 @@
 }
 
 /**
- *	atapi_qc_may_overflow - Check whether data transfer may overflow
- *	@qc: ATA command in question
- *
- *	ATAPI commands which transfer variable length data to host
- *	might overflow due to application error or hardare bug.  This
- *	function checks whether overflow should be drained and ignored
- *	for @qc.
- *
- *	LOCKING:
- *	None.
- *
- *	RETURNS:
- *	1 if @qc may overflow; otherwise, 0.
- */
-static int atapi_qc_may_overflow(struct ata_queued_cmd *qc)
-{
-	if (qc->tf.protocol != ATAPI_PROT_PIO &&
-	    qc->tf.protocol != ATAPI_PROT_DMA)
-		return 0;
-
-	if (qc->tf.flags & ATA_TFLAG_WRITE)
-		return 0;
-
-	switch (qc->cdb[0]) {
-	case READ_10:
-	case READ_12:
-	case WRITE_10:
-	case WRITE_12:
-	case GPCMD_READ_CD:
-	case GPCMD_READ_CD_MSF:
-		return 0;
-	}
-
-	return 1;
-}
-
-/**
  *	ata_std_qc_defer - Check whether a qc needs to be deferred
  *	@qc: ATA command in question
  *
@@ -4782,97 +4728,6 @@
 	qc->cursg = qc->sg;
 }
 
-static unsigned int ata_sg_setup_extra(struct ata_queued_cmd *qc,
-				       unsigned int *n_elem_extra,
-				       unsigned int *nbytes_extra)
-{
-	struct ata_port *ap = qc->ap;
-	unsigned int n_elem = qc->n_elem;
-	struct scatterlist *lsg, *copy_lsg = NULL, *tsg = NULL, *esg = NULL;
-
-	*n_elem_extra = 0;
-	*nbytes_extra = 0;
-
-	/* needs padding? */
-	qc->pad_len = qc->nbytes & 3;
-
-	if (likely(!qc->pad_len))
-		return n_elem;
-
-	/* locate last sg and save it */
-	lsg = sg_last(qc->sg, n_elem);
-	qc->last_sg = lsg;
-	qc->saved_last_sg = *lsg;
-
-	sg_init_table(qc->extra_sg, ARRAY_SIZE(qc->extra_sg));
-
-	if (qc->pad_len) {
-		struct scatterlist *psg = &qc->extra_sg[1];
-		void *pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
-		unsigned int offset;
-
-		WARN_ON(qc->dev->class != ATA_DEV_ATAPI);
-
-		memset(pad_buf, 0, ATA_DMA_PAD_SZ);
-
-		/* psg->page/offset are used to copy to-be-written
-		 * data in this function or read data in ata_sg_clean.
-		 */
-		offset = lsg->offset + lsg->length - qc->pad_len;
-		sg_set_page(psg, nth_page(sg_page(lsg), offset >> PAGE_SHIFT),
-			    qc->pad_len, offset_in_page(offset));
-
-		if (qc->tf.flags & ATA_TFLAG_WRITE) {
-			void *addr = kmap_atomic(sg_page(psg), KM_IRQ0);
-			memcpy(pad_buf, addr + psg->offset, qc->pad_len);
-			kunmap_atomic(addr, KM_IRQ0);
-		}
-
-		sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
-		sg_dma_len(psg) = ATA_DMA_PAD_SZ;
-
-		/* Trim the last sg entry and chain the original and
-		 * padding sg lists.
-		 *
-		 * Because chaining consumes one sg entry, one extra
-		 * sg entry is allocated and the last sg entry is
-		 * copied to it if the length isn't zero after padded
-		 * amount is removed.
-		 *
-		 * If the last sg entry is completely replaced by
-		 * padding sg entry, the first sg entry is skipped
-		 * while chaining.
-		 */
-		lsg->length -= qc->pad_len;
-		if (lsg->length) {
-			copy_lsg = &qc->extra_sg[0];
-			tsg = &qc->extra_sg[0];
-		} else {
-			n_elem--;
-			tsg = &qc->extra_sg[1];
-		}
-
-		esg = &qc->extra_sg[1];
-
-		(*n_elem_extra)++;
-		(*nbytes_extra) += 4 - qc->pad_len;
-	}
-
-	if (copy_lsg)
-		sg_set_page(copy_lsg, sg_page(lsg), lsg->length, lsg->offset);
-
-	sg_chain(lsg, 1, tsg);
-	sg_mark_end(esg);
-
-	/* sglist can't start with chaining sg entry, fast forward */
-	if (qc->sg == lsg) {
-		qc->sg = tsg;
-		qc->cursg = tsg;
-	}
-
-	return n_elem;
-}
-
 /**
  *	ata_sg_setup - DMA-map the scatter-gather table associated with a command.
  *	@qc: Command with scatter-gather table to be mapped.
@@ -4889,26 +4744,17 @@
 static int ata_sg_setup(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
-	unsigned int n_elem, n_elem_extra, nbytes_extra;
+	unsigned int n_elem;
 
 	VPRINTK("ENTER, ata%u\n", ap->print_id);
 
-	n_elem = ata_sg_setup_extra(qc, &n_elem_extra, &nbytes_extra);
+	n_elem = dma_map_sg(ap->dev, qc->sg, qc->n_elem, qc->dma_dir);
+	if (n_elem < 1)
+		return -1;
 
-	if (n_elem) {
-		n_elem = dma_map_sg(ap->dev, qc->sg, n_elem, qc->dma_dir);
-		if (n_elem < 1) {
-			/* restore last sg */
-			if (qc->last_sg)
-				*qc->last_sg = qc->saved_last_sg;
-			return -1;
-		}
-		DPRINTK("%d sg elements mapped\n", n_elem);
-	}
+	DPRINTK("%d sg elements mapped\n", n_elem);
 
-	qc->n_elem = qc->mapped_n_elem = n_elem;
-	qc->n_elem += n_elem_extra;
-	qc->nbytes += nbytes_extra;
+	qc->n_elem = n_elem;
 	qc->flags |= ATA_QCFLAG_DMAMAP;
 
 	return 0;
@@ -5146,46 +4992,22 @@
  */
 static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 {
-	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
+	int rw = (qc->tf.flags & ATA_TFLAG_WRITE) ? WRITE : READ;
 	struct ata_port *ap = qc->ap;
-	struct ata_eh_info *ehi = &qc->dev->link->eh_info;
+	struct ata_device *dev = qc->dev;
+	struct ata_eh_info *ehi = &dev->link->eh_info;
 	struct scatterlist *sg;
 	struct page *page;
 	unsigned char *buf;
-	unsigned int offset, count;
+	unsigned int offset, count, consumed;
 
 next_sg:
 	sg = qc->cursg;
 	if (unlikely(!sg)) {
-		/*
-		 * The end of qc->sg is reached and the device expects
-		 * more data to transfer. In order not to overrun qc->sg
-		 * and fulfill length specified in the byte count register,
-		 *    - for read case, discard trailing data from the device
-		 *    - for write case, padding zero data to the device
-		 */
-		u16 pad_buf[1] = { 0 };
-		unsigned int i;
-
-		if (bytes > qc->curbytes - qc->nbytes + ATAPI_MAX_DRAIN) {
-			ata_ehi_push_desc(ehi, "too much trailing data "
-					  "buf=%u cur=%u bytes=%u",
-					  qc->nbytes, qc->curbytes, bytes);
-			return -1;
-		}
-
-		 /* overflow is exptected for misc ATAPI commands */
-		if (bytes && !atapi_qc_may_overflow(qc))
-			ata_dev_printk(qc->dev, KERN_WARNING, "ATAPI %u bytes "
-				       "trailing data (cdb=%02x nbytes=%u)\n",
-				       bytes, qc->cdb[0], qc->nbytes);
-
-		for (i = 0; i < (bytes + 1) / 2; i++)
-			ap->ops->data_xfer(qc->dev, (unsigned char *)pad_buf, 2, do_write);
-
-		qc->curbytes += bytes;
-
-		return 0;
+		ata_ehi_push_desc(ehi, "unexpected or too much trailing data "
+				  "buf=%u cur=%u bytes=%u",
+				  qc->nbytes, qc->curbytes, bytes);
+		return -1;
 	}
 
 	page = sg_page(sg);
@@ -5211,18 +5033,16 @@
 		buf = kmap_atomic(page, KM_IRQ0);
 
 		/* do the actual data transfer */
-		ap->ops->data_xfer(qc->dev,  buf + offset, count, do_write);
+		consumed = ap->ops->data_xfer(dev,  buf + offset, count, rw);
 
 		kunmap_atomic(buf, KM_IRQ0);
 		local_irq_restore(flags);
 	} else {
 		buf = page_address(page);
-		ap->ops->data_xfer(qc->dev,  buf + offset, count, do_write);
+		consumed = ap->ops->data_xfer(dev,  buf + offset, count, rw);
 	}
 
-	bytes -= count;
-	if ((count & 1) && bytes)
-		bytes--;
+	bytes -= min(bytes, consumed);
 	qc->curbytes += count;
 	qc->cursg_ofs += count;
 
@@ -5231,9 +5051,11 @@
 		qc->cursg_ofs = 0;
 	}
 
+	/* consumed can be larger than count only for the last transfer */
+	WARN_ON(qc->cursg && count != consumed);
+
 	if (bytes)
 		goto next_sg;
-
 	return 0;
 }
 
@@ -5251,6 +5073,7 @@
 {
 	struct ata_port *ap = qc->ap;
 	struct ata_device *dev = qc->dev;
+	struct ata_eh_info *ehi = &dev->link->eh_info;
 	unsigned int ireason, bc_lo, bc_hi, bytes;
 	int i_write, do_write = (qc->tf.flags & ATA_TFLAG_WRITE) ? 1 : 0;
 
@@ -5268,26 +5091,28 @@
 
 	/* shall be cleared to zero, indicating xfer of data */
 	if (unlikely(ireason & (1 << 0)))
-		goto err_out;
+		goto atapi_check;
 
 	/* make sure transfer direction matches expected */
 	i_write = ((ireason & (1 << 1)) == 0) ? 1 : 0;
 	if (unlikely(do_write != i_write))
-		goto err_out;
+		goto atapi_check;
 
 	if (unlikely(!bytes))
-		goto err_out;
+		goto atapi_check;
 
 	VPRINTK("ata%u: xfering %d bytes\n", ap->print_id, bytes);
 
-	if (__atapi_pio_bytes(qc, bytes))
+	if (unlikely(__atapi_pio_bytes(qc, bytes)))
 		goto err_out;
 	ata_altstatus(ap); /* flush */
 
 	return;
 
-err_out:
-	ata_dev_printk(dev, KERN_INFO, "ATAPI check failed\n");
+ atapi_check:
+	ata_ehi_push_desc(ehi, "ATAPI check failed (ireason=0x%x bytes=%u)",
+			  ireason, bytes);
+ err_out:
 	qc->err_mask |= AC_ERR_HSM;
 	ap->hsm_task_state = HSM_ST_ERR;
 }
@@ -5972,9 +5797,6 @@
 	 */
 	BUG_ON(ata_is_data(prot) && (!qc->sg || !qc->n_elem || !qc->nbytes));
 
-	/* ata_sg_setup() may update nbytes */
-	qc->raw_nbytes = qc->nbytes;
-
 	if (ata_is_dma(prot) || (ata_is_pio(prot) &&
 				 (ap->flags & ATA_FLAG_PIO_DMA)))
 		if (ata_sg_setup(qc))
@@ -6583,19 +6405,12 @@
 int ata_port_start(struct ata_port *ap)
 {
 	struct device *dev = ap->dev;
-	int rc;
 
 	ap->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ, &ap->prd_dma,
 				      GFP_KERNEL);
 	if (!ap->prd)
 		return -ENOMEM;
 
-	rc = ata_pad_alloc(ap, dev);
-	if (rc)
-		return rc;
-
-	DPRINTK("prd alloc, virt %p, dma %llx\n", ap->prd,
-		(unsigned long long)ap->prd_dma);
 	return 0;
 }
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1cea18f..dd41b1a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -826,30 +826,61 @@
 	sdev->max_device_blocked = 1;
 }
 
-static void ata_scsi_dev_config(struct scsi_device *sdev,
-				struct ata_device *dev)
+/**
+ *	atapi_drain_needed - Check whether data transfer may overflow
+ *	@request: request to be checked
+ *
+ *	ATAPI commands which transfer variable length data to host
+ *	might overflow due to application error or hardare bug.  This
+ *	function checks whether overflow should be drained and ignored
+ *	for @request.
+ *
+ *	LOCKING:
+ *	None.
+ *
+ *	RETURNS:
+ *	1 if ; otherwise, 0.
+ */
+static int atapi_drain_needed(struct request *rq)
+{
+	if (likely(!blk_pc_request(rq)))
+		return 0;
+
+	if (!rq->data_len || (rq->cmd_flags & REQ_RW))
+		return 0;
+
+	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
+}
+
+static int ata_scsi_dev_config(struct scsi_device *sdev,
+			       struct ata_device *dev)
 {
 	/* configure max sectors */
 	blk_queue_max_sectors(sdev->request_queue, dev->max_sectors);
 
-	/* SATA DMA transfers must be multiples of 4 byte, so
-	 * we need to pad ATAPI transfers using an extra sg.
-	 * Decrement max hw segments accordingly.
-	 */
 	if (dev->class == ATA_DEV_ATAPI) {
 		struct request_queue *q = sdev->request_queue;
-		blk_queue_max_hw_segments(q, q->max_hw_segments - 1);
+		void *buf;
 
 		/* set the min alignment */
 		blk_queue_update_dma_alignment(sdev->request_queue,
 					       ATA_DMA_PAD_SZ - 1);
-	} else
+
+		/* configure draining */
+		buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
+		if (!buf) {
+			ata_dev_printk(dev, KERN_ERR,
+				       "drain buffer allocation failed\n");
+			return -ENOMEM;
+		}
+
+		blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
+	} else {
 		/* ATA devices must be sector aligned */
 		blk_queue_update_dma_alignment(sdev->request_queue,
 					       ATA_SECT_SIZE - 1);
-
-	if (dev->class == ATA_DEV_ATA)
 		sdev->manage_start_stop = 1;
+	}
 
 	if (dev->flags & ATA_DFLAG_AN)
 		set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
@@ -861,6 +892,8 @@
 		depth = min(ATA_MAX_QUEUE - 1, depth);
 		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
 	}
+
+	return 0;
 }
 
 /**
@@ -879,13 +912,14 @@
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
 	struct ata_device *dev = __ata_scsi_find_dev(ap, sdev);
+	int rc = 0;
 
 	ata_scsi_sdev_config(sdev);
 
 	if (dev)
-		ata_scsi_dev_config(sdev, dev);
+		rc = ata_scsi_dev_config(sdev, dev);
 
-	return 0;
+	return rc;
 }
 
 /**
@@ -905,6 +939,7 @@
 void ata_scsi_slave_destroy(struct scsi_device *sdev)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
+	struct request_queue *q = sdev->request_queue;
 	unsigned long flags;
 	struct ata_device *dev;
 
@@ -920,6 +955,10 @@
 		ata_port_schedule_eh(ap);
 	}
 	spin_unlock_irqrestore(ap->lock, flags);
+
+	kfree(q->dma_drain_buffer);
+	q->dma_drain_buffer = NULL;
+	q->dma_drain_size = 0;
 }
 
 /**
@@ -2500,7 +2539,7 @@
 	 * want to set it properly, and for DMA where it is
 	 * effectively meaningless.
 	 */
-	nbytes = min(qc->nbytes, (unsigned int)63 * 1024);
+	nbytes = min(scmd->request->raw_data_len, (unsigned int)63 * 1024);
 
 	/* Most ATAPI devices which honor transfer chunk size don't
 	 * behave according to the spec when odd chunk size which
@@ -3555,7 +3594,7 @@
  *	@ap: Port to initialize
  *
  *	Called just after data structures for each port are
- *	initialized.  Allocates DMA pad.
+ *	initialized.
  *
  *	May be used as the port_start() entry in ata_port_operations.
  *
@@ -3564,7 +3603,7 @@
  */
 int ata_sas_port_start(struct ata_port *ap)
 {
-	return ata_pad_alloc(ap, ap->dev);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(ata_sas_port_start);
 
@@ -3572,8 +3611,6 @@
  *	ata_port_stop - Undo ata_sas_port_start()
  *	@ap: Port to shut down
  *
- *	Frees the DMA pad.
- *
  *	May be used as the port_stop() entry in ata_port_operations.
  *
  *	LOCKING:
@@ -3582,7 +3619,6 @@
 
 void ata_sas_port_stop(struct ata_port *ap)
 {
-	ata_pad_free(ap, ap->dev);
 }
 EXPORT_SYMBOL_GPL(ata_sas_port_stop);
 
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index 5b8586d..f97068b 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -304,12 +304,6 @@
 }
 
 
-static int pata_icside_port_start(struct ata_port *ap)
-{
-	/* No PRD to alloc */
-	return ata_pad_alloc(ap, ap->dev);
-}
-
 static struct scsi_host_template pata_icside_sht = {
 	.module			= THIS_MODULE,
 	.name			= DRV_NAME,
@@ -389,8 +383,6 @@
 	.irq_clear		= ata_dummy_noret,
 	.irq_on			= ata_irq_on,
 
-	.port_start		= pata_icside_port_start,
-
 	.bmdma_stop		= pata_icside_bmdma_stop,
 	.bmdma_status		= pata_icside_bmdma_status,
 };
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index efcb66b6..9323dd0 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -601,21 +601,9 @@
 	if (!pp)
 		return -ENOMEM;
 
-	/*
-	 * allocate per command dma alignment pad buffer, which is used
-	 * internally by libATA to ensure that all transfers ending on
-	 * unaligned boundaries are padded, to align on Dword boundaries
-	 */
-	retval = ata_pad_alloc(ap, dev);
-	if (retval) {
-		kfree(pp);
-		return retval;
-	}
-
 	mem = dma_alloc_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ, &mem_dma,
 				 GFP_KERNEL);
 	if (!mem) {
-		ata_pad_free(ap, dev);
 		kfree(pp);
 		return -ENOMEM;
 	}
@@ -694,7 +682,6 @@
 	dma_free_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ,
 			  pp->cmdslot, pp->cmdslot_paddr);
 
-	ata_pad_free(ap, dev);
 	kfree(pp);
 }
 
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 2ecd44d..1c1fbf3 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -1158,17 +1158,13 @@
 	struct mv_port_priv *pp;
 	void __iomem *port_mmio = mv_ap_base(ap);
 	unsigned long flags;
-	int tag, rc;
+	int tag;
 
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
 		return -ENOMEM;
 	ap->private_data = pp;
 
-	rc = ata_pad_alloc(ap, dev);
-	if (rc)
-		return rc;
-
 	pp->crqb = dma_pool_alloc(hpriv->crqb_pool, GFP_KERNEL, &pp->crqb_dma);
 	if (!pp->crqb)
 		return -ENOMEM;
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index b4b1f91..df7988d 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -1234,7 +1234,6 @@
 	union sil24_cmd_block *cb;
 	size_t cb_size = sizeof(*cb) * SIL24_MAX_CMDS;
 	dma_addr_t cb_dma;
-	int rc;
 
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
@@ -1247,10 +1246,6 @@
 		return -ENOMEM;
 	memset(cb, 0, cb_size);
 
-	rc = ata_pad_alloc(ap, dev);
-	if (rc)
-		return rc;
-
 	pp->cmd_block = cb;
 	pp->cmd_block_dma = cb_dma;
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 2074701..c72014a 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -5140,7 +5140,7 @@
 	struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
 	struct ipr_ioadl_desc *ioadl = ipr_cmd->ioadl;
 	struct ipr_ioadl_desc *last_ioadl = NULL;
-	int len = qc->nbytes + qc->pad_len;
+	int len = qc->nbytes;
 	struct scatterlist *sg;
 	unsigned int si;
 
@@ -5206,7 +5206,7 @@
 	ioarcb->cmd_pkt.request_type = IPR_RQTYPE_ATA_PASSTHRU;
 	ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_LINK_DESC;
 	ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_NO_ULEN_CHK;
-	ipr_cmd->dma_use_sg = qc->pad_len ? qc->n_elem + 1 : qc->n_elem;
+	ipr_cmd->dma_use_sg = qc->n_elem;
 
 	ipr_build_ata_ioadl(ipr_cmd, qc);
 	regs->flags |= IPR_ATA_FLAG_STATUS_ON_GOOD_COMPLETION;
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 0996f86..7cd05b5 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -178,8 +178,8 @@
 	task->uldd_task = qc;
 	if (ata_is_atapi(qc->tf.protocol)) {
 		memcpy(task->ata_task.atapi_packet, qc->cdb, qc->dev->cdb_len);
-		task->total_xfer_len = qc->nbytes + qc->pad_len;
-		task->num_scatter = qc->pad_len ? qc->n_elem + 1 : qc->n_elem;
+		task->total_xfer_len = qc->nbytes;
+		task->num_scatter = qc->n_elem;
 	} else {
 		for_each_sg(qc->sg, sg, qc->n_elem, si)
 			xfer += sg->length;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 135c1d0..ba21d97 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1014,10 +1014,6 @@
 	}
 
 	req->buffer = NULL;
-	if (blk_pc_request(req))
-		sdb->length = req->data_len;
-	else
-		sdb->length = req->nr_sectors << 9;
 
 	/* 
 	 * Next, walk the list, and fill in the addresses and sizes of
@@ -1026,6 +1022,10 @@
 	count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
 	BUG_ON(count > sdb->table.nents);
 	sdb->table.nents = count;
+	if (blk_pc_request(req))
+		sdb->length = req->data_len;
+	else
+		sdb->length = req->nr_sectors << 9;
 	return BLKPREP_OK;
 }
 
diff --git a/fs/bio.c b/fs/bio.c
index 242e409..3312fcc 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -903,7 +903,7 @@
 	}
 }
 
-void bio_release_pages(struct bio *bio)
+static void bio_release_pages(struct bio *bio)
 {
 	struct bio_vec *bvec = bio->bi_io_vec;
 	int i;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 67fe72c..7d822fa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -31,6 +31,8 @@
 	struct inode vfs_inode;
 };
 
+static const struct address_space_operations def_blk_aops;
+
 static inline struct bdev_inode *BDEV_I(struct inode *inode)
 {
 	return container_of(inode, struct bdev_inode, vfs_inode);
@@ -171,203 +173,6 @@
 				iov, offset, nr_segs, blkdev_get_blocks, NULL);
 }
 
-#if 0
-static void blk_end_aio(struct bio *bio, int error)
-{
-	struct kiocb *iocb = bio->bi_private;
-	atomic_t *bio_count = &iocb->ki_bio_count;
-
-	if (bio_data_dir(bio) == READ)
-		bio_check_pages_dirty(bio);
-	else {
-		bio_release_pages(bio);
-		bio_put(bio);
-	}
-
-	/* iocb->ki_nbytes stores error code from LLDD */
-	if (error)
-		iocb->ki_nbytes = -EIO;
-
-	if (atomic_dec_and_test(bio_count)) {
-		if ((long)iocb->ki_nbytes < 0)
-			aio_complete(iocb, iocb->ki_nbytes, 0);
-		else
-			aio_complete(iocb, iocb->ki_left, 0);
-	}
-
-	return 0;
-}
-
-#define VEC_SIZE	16
-struct pvec {
-	unsigned short nr;
-	unsigned short idx;
-	struct page *page[VEC_SIZE];
-};
-
-#define PAGES_SPANNED(addr, len)	\
-	(DIV_ROUND_UP((addr) + (len), PAGE_SIZE) - (addr) / PAGE_SIZE);
-
-/*
- * get page pointer for user addr, we internally cache struct page array for
- * (addr, count) range in pvec to avoid frequent call to get_user_pages.  If
- * internal page list is exhausted, a batch count of up to VEC_SIZE is used
- * to get next set of page struct.
- */
-static struct page *blk_get_page(unsigned long addr, size_t count, int rw,
-				 struct pvec *pvec)
-{
-	int ret, nr_pages;
-	if (pvec->idx == pvec->nr) {
-		nr_pages = PAGES_SPANNED(addr, count);
-		nr_pages = min(nr_pages, VEC_SIZE);
-		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(current, current->mm, addr, nr_pages,
-				     rw == READ, 0, pvec->page, NULL);
-		up_read(&current->mm->mmap_sem);
-		if (ret < 0)
-			return ERR_PTR(ret);
-		pvec->nr = ret;
-		pvec->idx = 0;
-	}
-	return pvec->page[pvec->idx++];
-}
-
-/* return a page back to pvec array */
-static void blk_unget_page(struct page *page, struct pvec *pvec)
-{
-	pvec->page[--pvec->idx] = page;
-}
-
-static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		 loff_t pos, unsigned long nr_segs)
-{
-	struct inode *inode = iocb->ki_filp->f_mapping->host;
-	unsigned blkbits = blksize_bits(bdev_hardsect_size(I_BDEV(inode)));
-	unsigned blocksize_mask = (1 << blkbits) - 1;
-	unsigned long seg = 0;	/* iov segment iterator */
-	unsigned long nvec;	/* number of bio vec needed */
-	unsigned long cur_off;	/* offset into current page */
-	unsigned long cur_len;	/* I/O len of current page, up to PAGE_SIZE */
-
-	unsigned long addr;	/* user iovec address */
-	size_t count;		/* user iovec len */
-	size_t nbytes = iocb->ki_nbytes = iocb->ki_left; /* total xfer size */
-	loff_t size;		/* size of block device */
-	struct bio *bio;
-	atomic_t *bio_count = &iocb->ki_bio_count;
-	struct page *page;
-	struct pvec pvec;
-
-	pvec.nr = 0;
-	pvec.idx = 0;
-
-	if (pos & blocksize_mask)
-		return -EINVAL;
-
-	size = i_size_read(inode);
-	if (pos + nbytes > size) {
-		nbytes = size - pos;
-		iocb->ki_left = nbytes;
-	}
-
-	/*
-	 * check first non-zero iov alignment, the remaining
-	 * iov alignment is checked inside bio loop below.
-	 */
-	do {
-		addr = (unsigned long) iov[seg].iov_base;
-		count = min(iov[seg].iov_len, nbytes);
-		if (addr & blocksize_mask || count & blocksize_mask)
-			return -EINVAL;
-	} while (!count && ++seg < nr_segs);
-	atomic_set(bio_count, 1);
-
-	while (nbytes) {
-		/* roughly estimate number of bio vec needed */
-		nvec = (nbytes + PAGE_SIZE - 1) / PAGE_SIZE;
-		nvec = max(nvec, nr_segs - seg);
-		nvec = min(nvec, (unsigned long) BIO_MAX_PAGES);
-
-		/* bio_alloc should not fail with GFP_KERNEL flag */
-		bio = bio_alloc(GFP_KERNEL, nvec);
-		bio->bi_bdev = I_BDEV(inode);
-		bio->bi_end_io = blk_end_aio;
-		bio->bi_private = iocb;
-		bio->bi_sector = pos >> blkbits;
-same_bio:
-		cur_off = addr & ~PAGE_MASK;
-		cur_len = PAGE_SIZE - cur_off;
-		if (count < cur_len)
-			cur_len = count;
-
-		page = blk_get_page(addr, count, rw, &pvec);
-		if (unlikely(IS_ERR(page)))
-			goto backout;
-
-		if (bio_add_page(bio, page, cur_len, cur_off)) {
-			pos += cur_len;
-			addr += cur_len;
-			count -= cur_len;
-			nbytes -= cur_len;
-
-			if (count)
-				goto same_bio;
-			while (++seg < nr_segs) {
-				addr = (unsigned long) iov[seg].iov_base;
-				count = iov[seg].iov_len;
-				if (!count)
-					continue;
-				if (unlikely(addr & blocksize_mask ||
-					     count & blocksize_mask)) {
-					page = ERR_PTR(-EINVAL);
-					goto backout;
-				}
-				count = min(count, nbytes);
-				goto same_bio;
-			}
-		} else {
-			blk_unget_page(page, &pvec);
-		}
-
-		/* bio is ready, submit it */
-		if (rw == READ)
-			bio_set_pages_dirty(bio);
-		atomic_inc(bio_count);
-		submit_bio(rw, bio);
-	}
-
-completion:
-	iocb->ki_left -= nbytes;
-	nbytes = iocb->ki_left;
-	iocb->ki_pos += nbytes;
-
-	blk_run_address_space(inode->i_mapping);
-	if (atomic_dec_and_test(bio_count))
-		aio_complete(iocb, nbytes, 0);
-
-	return -EIOCBQUEUED;
-
-backout:
-	/*
-	 * back out nbytes count constructed so far for this bio,
-	 * we will throw away current bio.
-	 */
-	nbytes += bio->bi_size;
-	bio_release_pages(bio);
-	bio_put(bio);
-
-	/*
-	 * if no bio was submmitted, return the error code.
-	 * otherwise, proceed with pending I/O completion.
-	 */
-	if (atomic_read(bio_count) == 1)
-		return PTR_ERR(page);
-	goto completion;
-}
-#endif
-
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, blkdev_get_block, wbc);
@@ -1334,7 +1139,7 @@
 	return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
 
-const struct address_space_operations def_blk_aops = {
+static const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
 	.sync_page	= block_sync_page,
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a9931e2..0d0b7f6 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -105,7 +105,6 @@
 	wait_queue_t		ki_wait;
 	loff_t			ki_pos;
 
-	atomic_t		ki_bio_count;	/* num bio used for this iocb */
 	void			*private;
 	/* State that we remember to be able to restart/retry  */
 	unsigned short		ki_opcode;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4da4413..4c59bdc 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -326,7 +326,6 @@
 				gfp_t);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
-extern void bio_release_pages(struct bio *bio);
 extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e1888cc..6fe67d1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -216,6 +216,7 @@
 	unsigned int cmd_len;
 	unsigned char cmd[BLK_MAX_CDB];
 
+	unsigned int raw_data_len;
 	unsigned int data_len;
 	unsigned int sense_len;
 	void *data;
@@ -258,6 +259,7 @@
 typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
+typedef int (dma_drain_needed_fn)(struct request *);
 
 enum blk_queue_state {
 	Queue_down,
@@ -294,6 +296,7 @@
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
+	dma_drain_needed_fn	*dma_drain_needed;
 
 	/*
 	 * Dispatch queue sorting
@@ -698,8 +701,9 @@
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-extern int blk_queue_dma_drain(struct request_queue *q, void *buf,
-			       unsigned int size);
+extern int blk_queue_dma_drain(struct request_queue *q,
+			       dma_drain_needed_fn *dma_drain_needed,
+			       void *buf, unsigned int size);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98ffb6e..b84b848 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1590,7 +1590,6 @@
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
 extern struct block_device *open_by_devnum(dev_t, unsigned);
-extern const struct address_space_operations def_blk_aops;
 #else
 static inline void bd_forget(struct inode *inode) {}
 #endif
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index 593b222..1b4ccf2 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -50,6 +50,7 @@
 	sector_t seek_mean;
 
 	struct list_head queue_list;
+	struct hlist_node cic_list;
 
 	void (*dtor)(struct io_context *); /* destructor */
 	void (*exit)(struct io_context *); /* called on task exit */
@@ -77,6 +78,7 @@
 
 	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
+	struct hlist_head cic_list;
 	void *ioc_data;
 };
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bc5a8d0..2e098f9 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -278,7 +278,6 @@
 
 	/* size of buffer to pad xfers ending on unaligned boundaries */
 	ATA_DMA_PAD_SZ		= 4,
-	ATA_DMA_PAD_BUF_SZ	= ATA_DMA_PAD_SZ * ATA_MAX_QUEUE,
 
 	/* ering size */
 	ATA_ERING_SIZE		= 32,
@@ -457,24 +456,18 @@
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
 	unsigned int		tag;
 	unsigned int		n_elem;
-	unsigned int		mapped_n_elem;
 
 	int			dma_dir;
 
-	unsigned int		pad_len;
 	unsigned int		sect_size;
 
 	unsigned int		nbytes;
-	unsigned int		raw_nbytes;
 	unsigned int		curbytes;
 
 	struct scatterlist	*cursg;
 	unsigned int		cursg_ofs;
 
-	struct scatterlist	*last_sg;
-	struct scatterlist	saved_last_sg;
 	struct scatterlist	sgent;
-	struct scatterlist	extra_sg[2];
 
 	struct scatterlist	*sg;
 
@@ -619,9 +612,6 @@
 	struct ata_prd		*prd;	 /* our SG list */
 	dma_addr_t		prd_dma; /* and its DMA mapping */
 
-	void			*pad;	/* array of DMA pad buffers */
-	dma_addr_t		pad_dma;
-
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
 
 	u8			ctl;	/* cache of ATA control register */
@@ -1363,12 +1353,9 @@
 	qc->flags = 0;
 	qc->cursg = NULL;
 	qc->cursg_ofs = 0;
-	qc->nbytes = qc->raw_nbytes = qc->curbytes = 0;
+	qc->nbytes = qc->curbytes = 0;
 	qc->n_elem = 0;
-	qc->mapped_n_elem = 0;
 	qc->err_mask = 0;
-	qc->pad_len = 0;
-	qc->last_sg = NULL;
 	qc->sect_size = ATA_SECT_SIZE;
 
 	ata_tf_init(qc->dev, &qc->tf);
@@ -1423,19 +1410,6 @@
 	return mask;
 }
 
-static inline int ata_pad_alloc(struct ata_port *ap, struct device *dev)
-{
-	ap->pad_dma = 0;
-	ap->pad = dmam_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ,
-				      &ap->pad_dma, GFP_KERNEL);
-	return (ap->pad == NULL) ? -ENOMEM : 0;
-}
-
-static inline void ata_pad_free(struct ata_port *ap, struct device *dev)
-{
-	dmam_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
-}
-
 static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host)
 {
 	return *(struct ata_port **)&host->hostdata[0];