xfs: convert internal buffer functions to pass maps While the external interface currently uses separate blockno/length variables, we need to move internal interfaces to passing and parsing vector maps. This will then allow us to add external interfaces to support discontiguous buffer maps as the internal code will already support them. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ben Myers <bpm@sgi.com>

commit: 3e85c868a697805a3d4c7800a6bacdfc81d15cdf [log] [tgz]
author: Dave Chinner <dchinner@redhat.com> Fri Jun 22 18:50:09 2012 +1000
committer: Ben Myers <bpm@sgi.com> Sun Jul 01 14:50:05 2012 -0500
tree: c4c1ee345e2e84a6ff9abdef782cccc6293fc7c7
parent: cbb7baab285a540f173ef1ec3d5bcf9d0ad29d16 [diff] [blame]
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a843873b..82bb812 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c

@@ -164,14 +164,49 @@
 	ASSERT(atomic_read(&bp->b_hold) >= 1);
 }
 
+static int
+xfs_buf_get_maps(
+	struct xfs_buf		*bp,
+	int			map_count)
+{
+	ASSERT(bp->b_maps == NULL);
+	bp->b_map_count = map_count;
+
+	if (map_count == 1) {
+		bp->b_maps = &bp->b_map;
+		return 0;
+	}
+
+	bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
+				KM_NOFS);
+	if (!bp->b_maps)
+		return ENOMEM;
+	return 0;
+}
+
+/*
+ *	Frees b_pages if it was allocated.
+ */
+static void
+xfs_buf_free_maps(
+	struct xfs_buf	*bp)
+{
+	if (bp->b_maps != &bp->b_map) {
+		kmem_free(bp->b_maps);
+		bp->b_maps = NULL;
+	}
+}
+
 struct xfs_buf *
-xfs_buf_alloc(
+_xfs_buf_alloc(
 	struct xfs_buftarg	*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags)
 {
 	struct xfs_buf		*bp;
+	int			error;
+	int			i;
 
 	bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
 	if (unlikely(!bp))
@@ -192,18 +227,28 @@
 	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	XB_SET_OWNER(bp);
 	bp->b_target = target;
+	bp->b_flags = flags;
 
 	/*
 	 * Set length and io_length to the same value initially.
 	 * I/O routines should use io_length, which will be the same in
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
-	bp->b_length = numblks;
-	bp->b_io_length = numblks;
-	bp->b_flags = flags;
-	bp->b_bn = blkno;
-	bp->b_map.bm_bn = blkno;
-	bp->b_map.bm_len = numblks;
+	error = xfs_buf_get_maps(bp, nmaps);
+	if (error)  {
+		kmem_zone_free(xfs_buf_zone, bp);
+		return NULL;
+	}
+
+	bp->b_bn = map[0].bm_bn;
+	bp->b_length = 0;
+	for (i = 0; i < nmaps; i++) {
+		bp->b_maps[i].bm_bn = map[i].bm_bn;
+		bp->b_maps[i].bm_len = map[i].bm_len;
+		bp->b_length += map[i].bm_len;
+	}
+	bp->b_io_length = bp->b_length;
+
 	atomic_set(&bp->b_pin_count, 0);
 	init_waitqueue_head(&bp->b_waiters);
 
@@ -282,6 +327,7 @@
 	} else if (bp->b_flags & _XBF_KMEM)
 		kmem_free(bp->b_addr);
 	_xfs_buf_free_pages(bp);
+	xfs_buf_free_maps(bp);
 	kmem_zone_free(xfs_buf_zone, bp);
 }
 
@@ -428,8 +474,8 @@
 xfs_buf_t *
 _xfs_buf_find(
 	struct xfs_buftarg	*btp,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
+	struct xfs_buf_map	*map,
+	int			nmaps,
 	xfs_buf_flags_t		flags,
 	xfs_buf_t		*new_bp)
 {
@@ -438,7 +484,12 @@
 	struct rb_node		**rbp;
 	struct rb_node		*parent;
 	xfs_buf_t		*bp;
+	xfs_daddr_t		blkno = map[0].bm_bn;
+	int			numblks = 0;
+	int			i;
 
+	for (i = 0; i < nmaps; i++)
+		numblks += map[i].bm_len;
 	numbytes = BBTOB(numblks);
 
 	/* Check for IOs smaller than the sector size / not sector aligned */
@@ -539,22 +590,23 @@
 	struct xfs_buf		*bp;
 	struct xfs_buf		*new_bp;
 	int			error = 0;
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
 
-	bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+	bp = _xfs_buf_find(target, &map, 1, flags, NULL);
 	if (likely(bp))
 		goto found;
 
-	new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+	new_bp = _xfs_buf_alloc(target, &map, 1, flags);
 	if (unlikely(!new_bp))
 		return NULL;
 
 	error = xfs_buf_allocate_memory(new_bp, flags);
 	if (error) {
-		kmem_zone_free(xfs_buf_zone, new_bp);
+		xfs_buf_free(new_bp);
 		return NULL;
 	}
 
-	bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+	bp = _xfs_buf_find(target, &map, 1, flags, new_bp);
 	if (!bp) {
 		xfs_buf_free(new_bp);
 		return NULL;
@@ -666,7 +718,9 @@
 		return NULL;
 
 	/* set up the buffer for a read IO */
-	bp->b_map.bm_bn = daddr;
+	ASSERT(bp->b_map_count == 1);
+	bp->b_bn = daddr;
+	bp->b_maps[0].bm_bn = daddr;
 	bp->b_flags |= XBF_READ;
 
 	xfsbdstrat(target->bt_mount, bp);
@@ -695,9 +749,11 @@
 	bp->b_addr = NULL;
 	bp->b_length = numblks;
 	bp->b_io_length = numblks;
+
+	ASSERT(bp->b_map_count == 1);
 	bp->b_bn = XFS_BUF_DADDR_NULL;
-	bp->b_map.bm_bn = XFS_BUF_DADDR_NULL;
-	bp->b_map.bm_len = bp->b_length;
+	bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
+	bp->b_maps[0].bm_len = bp->b_length;
 }
 
 static inline struct page *
@@ -761,9 +817,10 @@
 {
 	unsigned long		page_count;
 	int			error, i;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
+	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
 
-	bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
+	bp = _xfs_buf_alloc(target, &map, 1, 0);
 	if (unlikely(bp == NULL))
 		goto fail;
 
@@ -794,6 +851,7 @@
 		__free_page(bp->b_pages[i]);
 	_xfs_buf_free_pages(bp);
  fail_free_buf:
+	xfs_buf_free_maps(bp);
 	kmem_zone_free(xfs_buf_zone, bp);
  fail:
 	return NULL;
@@ -1154,36 +1212,39 @@
 	bio_put(bio);
 }
 
-STATIC void
-_xfs_buf_ioapply(
-	xfs_buf_t		*bp)
+static void
+xfs_buf_ioapply_map(
+	struct xfs_buf	*bp,
+	int		map,
+	int		*buf_offset,
+	int		*count,
+	int		rw)
 {
-	int			rw, map_i, total_nr_pages, nr_pages;
-	struct bio		*bio;
-	int			offset = bp->b_offset;
-	int			size = BBTOB(bp->b_io_length);
-	sector_t		sector = bp->b_map.bm_bn;
+	int		page_index;
+	int		total_nr_pages = bp->b_page_count;
+	int		nr_pages;
+	struct bio	*bio;
+	sector_t	sector =  bp->b_maps[map].bm_bn;
+	int		size;
+	int		offset;
 
 	total_nr_pages = bp->b_page_count;
-	map_i = 0;
 
-	if (bp->b_flags & XBF_WRITE) {
-		if (bp->b_flags & XBF_SYNCIO)
-			rw = WRITE_SYNC;
-		else
-			rw = WRITE;
-		if (bp->b_flags & XBF_FUA)
-			rw |= REQ_FUA;
-		if (bp->b_flags & XBF_FLUSH)
-			rw |= REQ_FLUSH;
-	} else if (bp->b_flags & XBF_READ_AHEAD) {
-		rw = READA;
-	} else {
-		rw = READ;
+	/* skip the pages in the buffer before the start offset */
+	page_index = 0;
+	offset = *buf_offset;
+	while (offset >= PAGE_SIZE) {
+		page_index++;
+		offset -= PAGE_SIZE;
 	}
 
-	/* we only use the buffer cache for meta-data */
-	rw |= REQ_META;
+	/*
+	 * Limit the IO size to the length of the current vector, and update the
+	 * remaining IO count for the next time around.
+	 */
+	size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
+	*count -= size;
+	*buf_offset += size;
 
 next_chunk:
 	atomic_inc(&bp->b_io_remaining);
@@ -1198,13 +1259,14 @@
 	bio->bi_private = bp;
 
 
-	for (; size && nr_pages; nr_pages--, map_i++) {
+	for (; size && nr_pages; nr_pages--, page_index++) {
 		int	rbytes, nbytes = PAGE_SIZE - offset;
 
 		if (nbytes > size)
 			nbytes = size;
 
-		rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+		rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
+				      offset);
 		if (rbytes < nbytes)
 			break;
 
@@ -1226,6 +1288,54 @@
 		xfs_buf_ioerror(bp, EIO);
 		bio_put(bio);
 	}
+
+}
+
+STATIC void
+_xfs_buf_ioapply(
+	struct xfs_buf	*bp)
+{
+	struct blk_plug	plug;
+	int		rw;
+	int		offset;
+	int		size;
+	int		i;
+
+	if (bp->b_flags & XBF_WRITE) {
+		if (bp->b_flags & XBF_SYNCIO)
+			rw = WRITE_SYNC;
+		else
+			rw = WRITE;
+		if (bp->b_flags & XBF_FUA)
+			rw |= REQ_FUA;
+		if (bp->b_flags & XBF_FLUSH)
+			rw |= REQ_FLUSH;
+	} else if (bp->b_flags & XBF_READ_AHEAD) {
+		rw = READA;
+	} else {
+		rw = READ;
+	}
+
+	/* we only use the buffer cache for meta-data */
+	rw |= REQ_META;
+
+	/*
+	 * Walk all the vectors issuing IO on them. Set up the initial offset
+	 * into the buffer and the desired IO size before we start -
+	 * _xfs_buf_ioapply_vec() will modify them appropriately for each
+	 * subsequent call.
+	 */
+	offset = bp->b_offset;
+	size = BBTOB(bp->b_io_length);
+	blk_start_plug(&plug);
+	for (i = 0; i < bp->b_map_count; i++) {
+		xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+		if (bp->b_error)
+			break;
+		if (size <= 0)
+			break;	/* all done */
+	}
+	blk_finish_plug(&plug);
 }
 
 void
commit	3e85c868a697805a3d4c7800a6bacdfc81d15cdf	[log] [tgz]
author	Dave Chinner <dchinner@redhat.com>	Fri Jun 22 18:50:09 2012 +1000
committer	Ben Myers <bpm@sgi.com>	Sun Jul 01 14:50:05 2012 -0500
tree	c4c1ee345e2e84a6ff9abdef782cccc6293fc7c7
parent	cbb7baab285a540f173ef1ec3d5bcf9d0ad29d16 [diff] [blame]