Merge branch 'xfs-misc-fixes-2-for-3.16' into for-next Conflicts: fs/xfs/xfs_ialloc.c

commit: 0d907a3bb4a77cffebebd17c323e898048301aa3 [log] [tgz]
author: Dave Chinner <david@fromorbit.com> Tue May 20 08:56:00 2014 +1000
committer: Dave Chinner <david@fromorbit.com> Tue May 20 08:56:00 2014 +1000
tree: c093289c9ab443935ce10e138ab49efacb2c283f
parent: 2d6dcc6d7e95cc83046b2f97e179e6bbb7921245 [diff]
parent: 376c2f3a5f0706868b08ccf043bf3532936a03b1 [diff]
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32..d1b99b6 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c

@@ -982,7 +982,32 @@
 	offset = i_size_read(inode);
 	end_index = offset >> PAGE_CACHE_SHIFT;
 	last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index >= end_index) {
+
+	/*
+	 * The page index is less than the end_index, adjust the end_offset
+	 * to the highest offset that this page should represent.
+	 * -----------------------------------------------------
+	 * |			file mapping	       | <EOF> |
+	 * -----------------------------------------------------
+	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
+	 * ^--------------------------------^----------|--------
+	 * |     desired writeback range    |      see else    |
+	 * ---------------------------------^------------------|
+	 */
+	if (page->index < end_index)
+		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+	else {
+		/*
+		 * Check whether the page to write out is beyond or straddles
+		 * i_size or not.
+		 * -------------------------------------------------------
+		 * |		file mapping		        | <EOF>  |
+		 * -------------------------------------------------------
+		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
+		 * ^--------------------------------^-----------|---------
+		 * |				    |      Straddles     |
+		 * ---------------------------------^-----------|--------|
+		 */
 		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
 
 		/*
@@ -990,24 +1015,36 @@
 		 * truncate operation that is in progress. We must redirty the
 		 * page so that reclaim stops reclaiming it. Otherwise
 		 * xfs_vm_releasepage() is called on it and gets confused.
+		 *
+		 * Note that the end_index is unsigned long, it would overflow
+		 * if the given offset is greater than 16TB on 32-bit system
+		 * and if we do check the page is fully outside i_size or not
+		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
+		 * will be evaluated to 0.  Hence this page will be redirtied
+		 * and be written out repeatedly which would result in an
+		 * infinite loop, the user program that perform this operation
+		 * will hang.  Instead, we can verify this situation by checking
+		 * if the page to write is totally beyond the i_size or if it's
+		 * offset is just equal to the EOF.
 		 */
-		if (page->index >= end_index + 1 || offset_into_page == 0)
+		if (page->index > end_index ||
+		    (page->index == end_index && offset_into_page == 0))
 			goto redirty;
 
 		/*
 		 * The page straddles i_size.  It must be zeroed out on each
 		 * and every writepage invocation because it may be mmapped.
 		 * "A file is mapped in multiples of the page size.  For a file
-		 * that is not a multiple of the  page size, the remaining
+		 * that is not a multiple of the page size, the remaining
 		 * memory is zeroed when mapped, and writes to that region are
 		 * not written out to the file."
 		 */
 		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+		/* Adjust the end_offset to the end of file */
+		end_offset = offset;
 	}
 
-	end_offset = min_t(unsigned long long,
-			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
-			offset);
 	len = 1 << inode->i_blkbits;
 
 	bh = head = page_buffers(page);

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 296160b..057f671 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c

@@ -1519,7 +1519,6 @@
 
 	while (!error && !done) {
 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-		tp->t_flags |= XFS_TRANS_RESERVE;
 		/*
 		 * We would need to reserve permanent block for transaction.
 		 * This will come into picture when after shifting extent into
@@ -1529,7 +1528,6 @@
 		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
 				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
 		if (error) {
-			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
 			xfs_trans_cancel(tp, 0);
 			break;
 		}

diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9eec594..4db5102 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c

@@ -2624,47 +2624,6 @@
 		xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
 	else
 		xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-
-	/*
-	 * This verification code will be moved to a CRC verification callback
-	 * function so just leave it here unchanged until then.
-	 */
-	{
-		xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
-		xfs_dir2_free_t		*free = bp->b_addr;
-		xfs_da_blkinfo_t	*info = bp->b_addr;
-		uint			magic, magic1;
-		struct xfs_mount	*mp = dp->i_mount;
-
-		magic = be16_to_cpu(info->magic);
-		magic1 = be32_to_cpu(hdr->magic);
-		if (unlikely(
-		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
-				   (magic != XFS_DA3_NODE_MAGIC) &&
-				   (magic != XFS_ATTR_LEAF_MAGIC) &&
-				   (magic != XFS_ATTR3_LEAF_MAGIC) &&
-				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
-				   (magic != XFS_DIR3_LEAF1_MAGIC) &&
-				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
-				   (magic != XFS_DIR3_LEAFN_MAGIC) &&
-				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
-				   (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
-				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
-				   (magic1 != XFS_DIR3_DATA_MAGIC) &&
-				   (free->hdr.magic !=
-					cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
-				   (free->hdr.magic !=
-					cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
-				mp, XFS_ERRTAG_DA_READ_BUF,
-				XFS_RANDOM_DA_READ_BUF))) {
-			trace_xfs_da_btree_corrupt(bp, _RET_IP_);
-			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
-					     XFS_ERRLEVEL_LOW, mp, info);
-			error = XFS_ERROR(EFSCORRUPTED);
-			xfs_trans_brelse(trans, bp);
-			goto out_free;
-		}
-	}
 	*bpp = bp;
 out_free:
 	if (mapp != &map)

diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 6ac0c29..1313df7 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c

@@ -1437,7 +1437,7 @@
 	struct xfs_buf			*agbp,
 	xfs_agino_t			agino,
 	struct xfs_bmap_free		*flist,
-	int				*delete,
+	int				*deleted,
 	xfs_ino_t			*first_ino,
 	struct xfs_inobt_rec_incore	*orec)
 {
@@ -1497,7 +1497,7 @@
 	if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
 	    (rec.ir_freecount == mp->m_ialloc_inos)) {
 
-		*delete = 1;
+		*deleted = 1;
 		*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
 
 		/*
@@ -1525,7 +1525,7 @@
 				  XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
 				  mp->m_ialloc_blks, flist, mp);
 	} else {
-		*delete = 0;
+		*deleted = 0;
 
 		error = xfs_inobt_update(cur, &rec);
 		if (error) {
@@ -1662,7 +1662,7 @@
 	struct xfs_trans	*tp,		/* transaction pointer */
 	xfs_ino_t		inode,		/* inode to be freed */
 	struct xfs_bmap_free	*flist,		/* extents to free */
-	int			*delete,/* set if inode cluster was deleted */
+	int			*deleted,/* set if inode cluster was deleted */
 	xfs_ino_t		*first_ino)/* first inode in deleted cluster */
 {
 	/* REFERENCED */
@@ -1714,7 +1714,7 @@
 	/*
 	 * Fix up the inode allocation btree.
 	 */
-	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
+	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
 				 &rec);
 	if (error)
 		goto error0;

diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 812365d..95ad1c0 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h

@@ -90,7 +90,7 @@
 	struct xfs_trans *tp,		/* transaction pointer */
 	xfs_ino_t	inode,		/* inode to be freed */
 	struct xfs_bmap_free *flist,	/* extents to free */
-	int		*delete,	/* set if inode cluster was deleted */
+	int		*deleted,	/* set if inode cluster was deleted */
 	xfs_ino_t	*first_ino);	/* first inode in deleted cluster */
 
 /*

diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2c40044..84e0deb 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h

@@ -24,7 +24,8 @@
 	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
 	struct xfs_log_item	*lv_item;	/* owner */
 	char			*lv_buf;	/* formatted buffer */
-	int			lv_buf_len;	/* size of formatted buffer */
+	int			lv_bytes;	/* accounted space in buffer */
+	int			lv_buf_len;	/* aligned size of buffer */
 	int			lv_size;	/* size of allocated lv */
 };
 
@@ -52,15 +53,21 @@
 	return vec->i_addr;
 }
 
+/*
+ * We need to make sure the next buffer is naturally aligned for the biggest
+ * basic data type we put into it.  We already accounted for this padding when
+ * sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we have to
+ * track the space used by the log vectors separately to prevent log space hangs
+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
+ * CIL context ticket.
+ */
 static inline void
 xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 {
-	/*
-	 * We need to make sure the next buffer is naturally aligned for the
-	 * biggest basic data type we put into it.  We already accounted for
-	 * this when sizing the buffer.
-	 */
 	lv->lv_buf_len += round_up(len, sizeof(uint64_t));
+	lv->lv_bytes += len;
 	vec->i_len = len;
 }
 

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 039c873..b3425b3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c

@@ -97,7 +97,7 @@
 {
 	/* Account for the new LV being passed in */
 	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
-		*diff_len += lv->lv_buf_len;
+		*diff_len += lv->lv_bytes;
 		*diff_iovecs += lv->lv_niovecs;
 	}
 
@@ -111,7 +111,7 @@
 	else if (old_lv != lv) {
 		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
 
-		*diff_len -= old_lv->lv_buf_len;
+		*diff_len -= old_lv->lv_bytes;
 		*diff_iovecs -= old_lv->lv_niovecs;
 		kmem_free(old_lv);
 	}
@@ -239,7 +239,7 @@
 			 * that the space reservation accounting is correct.
 			 */
 			*diff_iovecs -= lv->lv_niovecs;
-			*diff_len -= lv->lv_buf_len;
+			*diff_len -= lv->lv_bytes;
 		} else {
 			/* allocate new data chunk */
 			lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
@@ -259,6 +259,7 @@
 
 		/* The allocated data region lies beyond the iovec region */
 		lv->lv_buf_len = 0;
+		lv->lv_bytes = 0;
 		lv->lv_buf = (char *)lv + buf_size - nbytes;
 		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
commit	0d907a3bb4a77cffebebd17c323e898048301aa3	[log] [tgz]
author	Dave Chinner <david@fromorbit.com>	Tue May 20 08:56:00 2014 +1000
committer	Dave Chinner <david@fromorbit.com>	Tue May 20 08:56:00 2014 +1000
tree	c093289c9ab443935ce10e138ab49efacb2c283f
parent	2d6dcc6d7e95cc83046b2f97e179e6bbb7921245 [diff]
parent	376c2f3a5f0706868b08ccf043bf3532936a03b1 [diff]