Blame - fs/xfs/libxfs/xfs_rmap_btree.c - kernel/msm-5.4

blob: 95cb964606d13c62ed95fb0fe1e56802be297824 [file] [log] [blame]

Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	1	/*
				2	* Copyright (c) 2014 Red Hat, Inc.
				3	* All Rights Reserved.
				4	*
				5	* This program is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU General Public License as
				7	* published by the Free Software Foundation.
				8	*
				9	* This program is distributed in the hope that it would be useful,
				10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				12	* GNU General Public License for more details.
				13	*
				14	* You should have received a copy of the GNU General Public License
				15	* along with this program; if not, write the Free Software Foundation,
				16	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
				17	*/
				18	#include "xfs.h"
				19	#include "xfs_fs.h"
				20	#include "xfs_shared.h"
				21	#include "xfs_format.h"
				22	#include "xfs_log_format.h"
				23	#include "xfs_trans_resv.h"
				24	#include "xfs_bit.h"
				25	#include "xfs_sb.h"
				26	#include "xfs_mount.h"
				27	#include "xfs_defer.h"
				28	#include "xfs_inode.h"
				29	#include "xfs_trans.h"
				30	#include "xfs_alloc.h"
				31	#include "xfs_btree.h"
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	32	#include "xfs_rmap.h"
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	33	#include "xfs_rmap_btree.h"
				34	#include "xfs_trace.h"
				35	#include "xfs_cksum.h"
				36	#include "xfs_error.h"
				37	#include "xfs_extent_busy.h"
				38
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	39	/*
				40	* Reverse map btree.
				41	*
				42	* This is a per-ag tree used to track the owner(s) of a given extent. With
				43	* reflink it is possible for there to be multiple owners, which is a departure
				44	* from classic XFS. Owner records for data extents are inserted when the
				45	* extent is mapped and removed when an extent is unmapped. Owner records for
				46	* all other block types (i.e. metadata) are inserted when an extent is
				47	* allocated and removed when an extent is freed. There can only be one owner
				48	* of a metadata extent, usually an inode or some other metadata structure like
				49	* an AG btree.
				50	*
				51	* The rmap btree is part of the free space management, so blocks for the tree
				52	* are sourced from the agfl. Hence we need transaction reservation support for
				53	* this tree so that the freelist is always large enough. This also impacts on
				54	* the minimum space we need to leave free in the AG.
				55	*
				56	* The tree is ordered by [ag block, owner, offset]. This is a large key size,
				57	* but it is the only way to enforce unique keys when a block can be owned by
				58	* multiple files at any offset. There's no need to order/search by extent
				59	* size for online updating/management of the tree. It is intended that most
				60	* reverse lookups will be to find the owner(s) of a particular block, or to
				61	* try to recover tree and file data from corrupt primary metadata.
				62	*/
				63
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	64	static struct xfs_btree_cur *
				65	xfs_rmapbt_dup_cursor(
				66	struct xfs_btree_cur *cur)
				67	{
				68	return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
				69	cur->bc_private.a.agbp, cur->bc_private.a.agno);
				70	}
				71
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	72	STATIC void
				73	xfs_rmapbt_set_root(
				74	struct xfs_btree_cur *cur,
				75	union xfs_btree_ptr *ptr,
				76	int inc)
				77	{
				78	struct xfs_buf *agbp = cur->bc_private.a.agbp;
				79	struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
				80	xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno);
				81	int btnum = cur->bc_btnum;
				82	struct xfs_perag *pag = xfs_perag_get(cur->bc_mp, seqno);
				83
				84	ASSERT(ptr->s != 0);
				85
				86	agf->agf_roots[btnum] = ptr->s;
				87	be32_add_cpu(&agf->agf_levels[btnum], inc);
				88	pag->pagf_levels[btnum] += inc;
				89	xfs_perag_put(pag);
				90
				91	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS \| XFS_AGF_LEVELS);
				92	}
				93
				94	STATIC int
				95	xfs_rmapbt_alloc_block(
				96	struct xfs_btree_cur *cur,
				97	union xfs_btree_ptr *start,
				98	union xfs_btree_ptr *new,
				99	int *stat)
				100	{
				101	int error;
				102	xfs_agblock_t bno;
				103
				104	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
				105
				106	/* Allocate the new block from the freelist. If we can't, give up. */
				107	error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
				108	&bno, 1);
				109	if (error) {
				110	XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
				111	return error;
				112	}
				113
				114	trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
				115	bno, 1);
				116	if (bno == NULLAGBLOCK) {
				117	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
				118	*stat = 0;
				119	return 0;
				120	}
				121
				122	xfs_extent_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1,
				123	false);
				124
				125	xfs_trans_agbtree_delta(cur->bc_tp, 1);
				126	new->s = cpu_to_be32(bno);
				127
				128	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
				129	*stat = 1;
				130	return 0;
				131	}
				132
				133	STATIC int
				134	xfs_rmapbt_free_block(
				135	struct xfs_btree_cur *cur,
				136	struct xfs_buf *bp)
				137	{
				138	struct xfs_buf *agbp = cur->bc_private.a.agbp;
				139	struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
				140	xfs_agblock_t bno;
				141	int error;
				142
				143	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
				144	trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
				145	bno, 1);
				146	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
				147	if (error)
				148	return error;
				149
				150	xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
				151	XFS_EXTENT_BUSY_SKIP_DISCARD);
				152	xfs_trans_agbtree_delta(cur->bc_tp, -1);
				153
				154	return 0;
				155	}
				156
				157	STATIC int
				158	xfs_rmapbt_get_minrecs(
				159	struct xfs_btree_cur *cur,
				160	int level)
				161	{
				162	return cur->bc_mp->m_rmap_mnr[level != 0];
				163	}
				164
				165	STATIC int
				166	xfs_rmapbt_get_maxrecs(
				167	struct xfs_btree_cur *cur,
				168	int level)
				169	{
				170	return cur->bc_mp->m_rmap_mxr[level != 0];
				171	}
				172
				173	STATIC void
				174	xfs_rmapbt_init_key_from_rec(
				175	union xfs_btree_key *key,
				176	union xfs_btree_rec *rec)
				177	{
				178	key->rmap.rm_startblock = rec->rmap.rm_startblock;
				179	key->rmap.rm_owner = rec->rmap.rm_owner;
				180	key->rmap.rm_offset = rec->rmap.rm_offset;
				181	}
				182
				183	STATIC void
				184	xfs_rmapbt_init_rec_from_cur(
				185	struct xfs_btree_cur *cur,
				186	union xfs_btree_rec *rec)
				187	{
				188	rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
				189	rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
				190	rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
				191	rec->rmap.rm_offset = cpu_to_be64(
				192	xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
				193	}
				194
				195	STATIC void
				196	xfs_rmapbt_init_ptr_from_cur(
				197	struct xfs_btree_cur *cur,
				198	union xfs_btree_ptr *ptr)
				199	{
				200	struct xfs_agf *agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
				201
				202	ASSERT(cur->bc_private.a.agno == be32_to_cpu(agf->agf_seqno));
				203	ASSERT(agf->agf_roots[cur->bc_btnum] != 0);
				204
				205	ptr->s = agf->agf_roots[cur->bc_btnum];
				206	}
				207
				208	STATIC __int64_t
				209	xfs_rmapbt_key_diff(
				210	struct xfs_btree_cur *cur,
				211	union xfs_btree_key *key)
				212	{
				213	struct xfs_rmap_irec *rec = &cur->bc_rec.r;
				214	struct xfs_rmap_key *kp = &key->rmap;
				215	__u64 x, y;
				216	__int64_t d;
				217
				218	d = (__int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
				219	if (d)
				220	return d;
				221
				222	x = be64_to_cpu(kp->rm_owner);
				223	y = rec->rm_owner;
				224	if (x > y)
				225	return 1;
				226	else if (y > x)
				227	return -1;
				228
				229	x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
				230	y = rec->rm_offset;
				231	if (x > y)
				232	return 1;
				233	else if (y > x)
				234	return -1;
				235	return 0;
				236	}
				237
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	238	static bool
				239	xfs_rmapbt_verify(
				240	struct xfs_buf *bp)
				241	{
				242	struct xfs_mount *mp = bp->b_target->bt_mount;
				243	struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
				244	struct xfs_perag *pag = bp->b_pag;
				245	unsigned int level;
				246
				247	/*
				248	* magic number and level verification
				249	*
				250	* During growfs operations, we can't verify the exact level or owner as
				251	* the perag is not fully initialised and hence not attached to the
				252	* buffer. In this case, check against the maximum tree depth.
				253	*
				254	* Similarly, during log recovery we will have a perag structure
				255	* attached, but the agf information will not yet have been initialised
				256	* from the on disk AGF. Again, we can only check against maximum limits
				257	* in this case.
				258	*/
				259	if (block->bb_magic != cpu_to_be32(XFS_RMAP_CRC_MAGIC))
				260	return false;
				261
				262	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
				263	return false;
				264	if (!xfs_btree_sblock_v5hdr_verify(bp))
				265	return false;
				266
				267	level = be16_to_cpu(block->bb_level);
				268	if (pag && pag->pagf_init) {
				269	if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
				270	return false;
				271	} else if (level >= mp->m_rmap_maxlevels)
				272	return false;
				273
				274	return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
				275	}
				276
				277	static void
				278	xfs_rmapbt_read_verify(
				279	struct xfs_buf *bp)
				280	{
				281	if (!xfs_btree_sblock_verify_crc(bp))
				282	xfs_buf_ioerror(bp, -EFSBADCRC);
				283	else if (!xfs_rmapbt_verify(bp))
				284	xfs_buf_ioerror(bp, -EFSCORRUPTED);
				285
				286	if (bp->b_error) {
				287	trace_xfs_btree_corrupt(bp, _RET_IP_);
				288	xfs_verifier_error(bp);
				289	}
				290	}
				291
				292	static void
				293	xfs_rmapbt_write_verify(
				294	struct xfs_buf *bp)
				295	{
				296	if (!xfs_rmapbt_verify(bp)) {
				297	trace_xfs_btree_corrupt(bp, _RET_IP_);
				298	xfs_buf_ioerror(bp, -EFSCORRUPTED);
				299	xfs_verifier_error(bp);
				300	return;
				301	}
				302	xfs_btree_sblock_calc_crc(bp);
				303
				304	}
				305
				306	const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
				307	.name = "xfs_rmapbt",
				308	.verify_read = xfs_rmapbt_read_verify,
				309	.verify_write = xfs_rmapbt_write_verify,
				310	};
				311
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	312	#if defined(DEBUG) \|\| defined(XFS_WARN)
				313	STATIC int
				314	xfs_rmapbt_keys_inorder(
				315	struct xfs_btree_cur *cur,
				316	union xfs_btree_key *k1,
				317	union xfs_btree_key *k2)
				318	{
				319	__uint32_t x;
				320	__uint32_t y;
				321	__uint64_t a;
				322	__uint64_t b;
				323
				324	x = be32_to_cpu(k1->rmap.rm_startblock);
				325	y = be32_to_cpu(k2->rmap.rm_startblock);
				326	if (x < y)
				327	return 1;
				328	else if (x > y)
				329	return 0;
				330	a = be64_to_cpu(k1->rmap.rm_owner);
				331	b = be64_to_cpu(k2->rmap.rm_owner);
				332	if (a < b)
				333	return 1;
				334	else if (a > b)
				335	return 0;
				336	a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
				337	b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
				338	if (a <= b)
				339	return 1;
				340	return 0;
				341	}
				342
				343	STATIC int
				344	xfs_rmapbt_recs_inorder(
				345	struct xfs_btree_cur *cur,
				346	union xfs_btree_rec *r1,
				347	union xfs_btree_rec *r2)
				348	{
				349	__uint32_t x;
				350	__uint32_t y;
				351	__uint64_t a;
				352	__uint64_t b;
				353
				354	x = be32_to_cpu(r1->rmap.rm_startblock);
				355	y = be32_to_cpu(r2->rmap.rm_startblock);
				356	if (x < y)
				357	return 1;
				358	else if (x > y)
				359	return 0;
				360	a = be64_to_cpu(r1->rmap.rm_owner);
				361	b = be64_to_cpu(r2->rmap.rm_owner);
				362	if (a < b)
				363	return 1;
				364	else if (a > b)
				365	return 0;
				366	a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
				367	b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
				368	if (a <= b)
				369	return 1;
				370	return 0;
				371	}
				372	#endif /* DEBUG */
				373
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	374	static const struct xfs_btree_ops xfs_rmapbt_ops = {
				375	.rec_len = sizeof(struct xfs_rmap_rec),
				376	.key_len = 2 * sizeof(struct xfs_rmap_key),
				377
				378	.dup_cursor = xfs_rmapbt_dup_cursor,
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	379	.set_root = xfs_rmapbt_set_root,
				380	.alloc_block = xfs_rmapbt_alloc_block,
				381	.free_block = xfs_rmapbt_free_block,
				382	.get_minrecs = xfs_rmapbt_get_minrecs,
				383	.get_maxrecs = xfs_rmapbt_get_maxrecs,
				384	.init_key_from_rec = xfs_rmapbt_init_key_from_rec,
				385	.init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
				386	.init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur,
				387	.key_diff = xfs_rmapbt_key_diff,
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	388	.buf_ops = &xfs_rmapbt_buf_ops,
Darrick J. Wong	4b8ed67	2016-08-03 11:39:05 +1000	[diff] [blame^]	389	#if defined(DEBUG) \|\| defined(XFS_WARN)
				390	.keys_inorder = xfs_rmapbt_keys_inorder,
				391	.recs_inorder = xfs_rmapbt_recs_inorder,
				392	#endif
Darrick J. Wong	035e00a	2016-08-03 11:36:07 +1000	[diff] [blame]	393
				394	.get_leaf_keys = xfs_btree_get_leaf_keys_overlapped,
				395	.get_node_keys = xfs_btree_get_node_keys_overlapped,
				396	.update_keys = xfs_btree_update_keys_overlapped,
				397	};
				398
				399	/*
				400	* Allocate a new allocation btree cursor.
				401	*/
				402	struct xfs_btree_cur *
				403	xfs_rmapbt_init_cursor(
				404	struct xfs_mount *mp,
				405	struct xfs_trans *tp,
				406	struct xfs_buf *agbp,
				407	xfs_agnumber_t agno)
				408	{
				409	struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
				410	struct xfs_btree_cur *cur;
				411
				412	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS);
				413	cur->bc_tp = tp;
				414	cur->bc_mp = mp;
				415	cur->bc_btnum = XFS_BTNUM_RMAP;
				416	cur->bc_flags = XFS_BTREE_CRC_BLOCKS;
				417	cur->bc_blocklog = mp->m_sb.sb_blocklog;
				418	cur->bc_ops = &xfs_rmapbt_ops;
				419	cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
				420
				421	cur->bc_private.a.agbp = agbp;
				422	cur->bc_private.a.agno = agno;
				423
				424	return cur;
				425	}
				426
				427	/*
				428	* Calculate number of records in an rmap btree block.
				429	*/
				430	int
				431	xfs_rmapbt_maxrecs(
				432	struct xfs_mount *mp,
				433	int blocklen,
				434	int leaf)
				435	{
				436	blocklen -= XFS_RMAP_BLOCK_LEN;
				437
				438	if (leaf)
				439	return blocklen / sizeof(struct xfs_rmap_rec);
				440	return blocklen /
				441	(sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
				442	}
				443
				444	/* Compute the maximum height of an rmap btree. */
				445	void
				446	xfs_rmapbt_compute_maxlevels(
				447	struct xfs_mount *mp)
				448	{
				449	mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
				450	mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
				451	}