xfs: propagate bmap updates to rmapbt When we map, unmap, or convert an extent in a file's data or attr fork, schedule a respective update in the rmapbt. Previous versions of this patch required a 1:1 correspondence between bmap and rmap, but this is no longer true as we now have ability to make interval queries against the rmapbt. We use the deferred operations code to handle redo operations atomically and deadlock free. This plumbs in all five rmap actions (map, unmap, convert extent, alloc, free); we'll use the first three now for file data, and reflink will want the last two. We also add an error injection site to test log recovery. Finally, we need to fix the bmap shift extent code to adjust the rmaps correctly. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>

commit: 9c19464469556a0cd342fc40a24926ab46d7d243 [log] [tgz]
author: Darrick J. Wong <darrick.wong@oracle.com> Wed Aug 03 12:16:05 2016 +1000
committer: Dave Chinner <david@fromorbit.com> Wed Aug 03 12:16:05 2016 +1000
tree: c4cd6d84d5e799734cefc169ef627103b485d511
parent: f8dbebef98f0b960a0e91d6b8d45c288c377797b [diff] [blame]
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index e8ce97f..73d0540 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c

@@ -36,6 +36,8 @@
 #include "xfs_trace.h"
 #include "xfs_error.h"
 #include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
+#include "xfs_inode.h"
 
 /*
  * Lookup the first record less than or equal to [bno, len, owner, offset]
@@ -1138,3 +1140,260 @@
 	return xfs_btree_query_range(cur, &low_brec, &high_brec,
 			xfs_rmap_query_range_helper, &query);
 }
+
+/* Clean up after calling xfs_rmap_finish_one. */
+void
+xfs_rmap_finish_one_cleanup(
+	struct xfs_trans	*tp,
+	struct xfs_btree_cur	*rcur,
+	int			error)
+{
+	struct xfs_buf		*agbp;
+
+	if (rcur == NULL)
+		return;
+	agbp = rcur->bc_private.a.agbp;
+	xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+	if (error)
+		xfs_trans_brelse(tp, agbp);
+}
+
+/*
+ * Process one of the deferred rmap operations.  We pass back the
+ * btree cursor to maintain our lock on the rmapbt between calls.
+ * This saves time and eliminates a buffer deadlock between the
+ * superblock and the AGF because we'll always grab them in the same
+ * order.
+ */
+int
+xfs_rmap_finish_one(
+	struct xfs_trans		*tp,
+	enum xfs_rmap_intent_type	type,
+	__uint64_t			owner,
+	int				whichfork,
+	xfs_fileoff_t			startoff,
+	xfs_fsblock_t			startblock,
+	xfs_filblks_t			blockcount,
+	xfs_exntst_t			state,
+	struct xfs_btree_cur		**pcur)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_btree_cur		*rcur;
+	struct xfs_buf			*agbp = NULL;
+	int				error = 0;
+	xfs_agnumber_t			agno;
+	struct xfs_owner_info		oinfo;
+	xfs_agblock_t			bno;
+	bool				unwritten;
+
+	agno = XFS_FSB_TO_AGNO(mp, startblock);
+	ASSERT(agno != NULLAGNUMBER);
+	bno = XFS_FSB_TO_AGBNO(mp, startblock);
+
+	trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork,
+			startoff, blockcount, state);
+
+	if (XFS_TEST_ERROR(false, mp,
+			XFS_ERRTAG_RMAP_FINISH_ONE,
+			XFS_RANDOM_RMAP_FINISH_ONE))
+		return -EIO;
+
+	/*
+	 * If we haven't gotten a cursor or the cursor AG doesn't match
+	 * the startblock, get one now.
+	 */
+	rcur = *pcur;
+	if (rcur != NULL && rcur->bc_private.a.agno != agno) {
+		xfs_rmap_finish_one_cleanup(tp, rcur, 0);
+		rcur = NULL;
+		*pcur = NULL;
+	}
+	if (rcur == NULL) {
+		/*
+		 * Refresh the freelist before we start changing the
+		 * rmapbt, because a shape change could cause us to
+		 * allocate blocks.
+		 */
+		error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
+		if (error)
+			return error;
+		if (!agbp)
+			return -EFSCORRUPTED;
+
+		rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
+		if (!rcur) {
+			error = -ENOMEM;
+			goto out_cur;
+		}
+	}
+	*pcur = rcur;
+
+	xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff);
+	unwritten = state == XFS_EXT_UNWRITTEN;
+	bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock);
+
+	switch (type) {
+	case XFS_RMAP_ALLOC:
+	case XFS_RMAP_MAP:
+		error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo);
+		break;
+	case XFS_RMAP_FREE:
+	case XFS_RMAP_UNMAP:
+		error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten,
+				&oinfo);
+		break;
+	case XFS_RMAP_CONVERT:
+		error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten,
+				&oinfo);
+		break;
+	default:
+		ASSERT(0);
+		error = -EFSCORRUPTED;
+	}
+	return error;
+
+out_cur:
+	xfs_trans_brelse(tp, agbp);
+
+	return error;
+}
+
+/*
+ * Don't defer an rmap if we aren't an rmap filesystem.
+ */
+static bool
+xfs_rmap_update_is_needed(
+	struct xfs_mount	*mp)
+{
+	return xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/*
+ * Record a rmap intent; the list is kept sorted first by AG and then by
+ * increasing age.
+ */
+static int
+__xfs_rmap_add(
+	struct xfs_mount		*mp,
+	struct xfs_defer_ops		*dfops,
+	enum xfs_rmap_intent_type	type,
+	__uint64_t			owner,
+	int				whichfork,
+	struct xfs_bmbt_irec		*bmap)
+{
+	struct xfs_rmap_intent	*ri;
+
+	trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+			type,
+			XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+			owner, whichfork,
+			bmap->br_startoff,
+			bmap->br_blockcount,
+			bmap->br_state);
+
+	ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+	INIT_LIST_HEAD(&ri->ri_list);
+	ri->ri_type = type;
+	ri->ri_owner = owner;
+	ri->ri_whichfork = whichfork;
+	ri->ri_bmap = *bmap;
+
+	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
+	return 0;
+}
+
+/* Map an extent into a file. */
+int
+xfs_rmap_map_extent(
+	struct xfs_mount	*mp,
+	struct xfs_defer_ops	*dfops,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV)
+{
+	if (!xfs_rmap_update_is_needed(mp))
+		return 0;
+
+	return __xfs_rmap_add(mp, dfops, XFS_RMAP_MAP, ip->i_ino,
+			whichfork, PREV);
+}
+
+/* Unmap an extent out of a file. */
+int
+xfs_rmap_unmap_extent(
+	struct xfs_mount	*mp,
+	struct xfs_defer_ops	*dfops,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV)
+{
+	if (!xfs_rmap_update_is_needed(mp))
+		return 0;
+
+	return __xfs_rmap_add(mp, dfops, XFS_RMAP_UNMAP, ip->i_ino,
+			whichfork, PREV);
+}
+
+/* Convert a data fork extent from unwritten to real or vice versa. */
+int
+xfs_rmap_convert_extent(
+	struct xfs_mount	*mp,
+	struct xfs_defer_ops	*dfops,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	struct xfs_bmbt_irec	*PREV)
+{
+	if (!xfs_rmap_update_is_needed(mp))
+		return 0;
+
+	return __xfs_rmap_add(mp, dfops, XFS_RMAP_CONVERT, ip->i_ino,
+			whichfork, PREV);
+}
+
+/* Schedule the creation of an rmap for non-file data. */
+int
+xfs_rmap_alloc_extent(
+	struct xfs_mount	*mp,
+	struct xfs_defer_ops	*dfops,
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	__uint64_t		owner)
+{
+	struct xfs_bmbt_irec	bmap;
+
+	if (!xfs_rmap_update_is_needed(mp))
+		return 0;
+
+	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+	bmap.br_blockcount = len;
+	bmap.br_startoff = 0;
+	bmap.br_state = XFS_EXT_NORM;
+
+	return __xfs_rmap_add(mp, dfops, XFS_RMAP_ALLOC, owner,
+			XFS_DATA_FORK, &bmap);
+}
+
+/* Schedule the deletion of an rmap for non-file data. */
+int
+xfs_rmap_free_extent(
+	struct xfs_mount	*mp,
+	struct xfs_defer_ops	*dfops,
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		bno,
+	xfs_extlen_t		len,
+	__uint64_t		owner)
+{
+	struct xfs_bmbt_irec	bmap;
+
+	if (!xfs_rmap_update_is_needed(mp))
+		return 0;
+
+	bmap.br_startblock = XFS_AGB_TO_FSB(mp, agno, bno);
+	bmap.br_blockcount = len;
+	bmap.br_startoff = 0;
+	bmap.br_state = XFS_EXT_NORM;
+
+	return __xfs_rmap_add(mp, dfops, XFS_RMAP_FREE, owner,
+			XFS_DATA_FORK, &bmap);
+}
commit	9c19464469556a0cd342fc40a24926ab46d7d243	[log] [tgz]
author	Darrick J. Wong <darrick.wong@oracle.com>	Wed Aug 03 12:16:05 2016 +1000
committer	Dave Chinner <david@fromorbit.com>	Wed Aug 03 12:16:05 2016 +1000
tree	c4cd6d84d5e799734cefc169ef627103b485d511
parent	f8dbebef98f0b960a0e91d6b8d45c288c377797b [diff] [blame]