blob: f8bab9b2275559f785c0776315c996fe12c41537 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +110020#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110022#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110024#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_mount.h"
Darrick J. Wong3ab78df2016-08-03 11:15:38 +100026#include "xfs_defer.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110028#include "xfs_trans.h"
Christoph Hellwig38bb7422008-10-30 16:56:22 +110029#include "xfs_inode_item.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050030#include "xfs_buf_item.h"
Nathan Scotta844f452005-11-02 14:38:42 +110031#include "xfs_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000033#include "xfs_trace.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050034#include "xfs_cksum.h"
Dave Chinnercf11da92014-07-15 07:08:24 +100035#include "xfs_alloc.h"
Brian Fostera45086e2015-10-12 15:59:25 +110036#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38/*
39 * Cursor allocation zone.
40 */
41kmem_zone_t *xfs_btree_cur_zone;
42
43/*
44 * Btree magic numbers.
45 */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050046static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
Darrick J. Wongb8704942016-08-03 11:30:32 +100047 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
Darrick J. Wong46eeb522016-10-03 09:11:16 -070048 XFS_FIBT_MAGIC, 0 },
Darrick J. Wongb8704942016-08-03 11:30:32 +100049 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
Darrick J. Wong46eeb522016-10-03 09:11:16 -070050 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
51 XFS_REFC_CRC_MAGIC }
Linus Torvalds1da177e2005-04-16 15:20:36 -070052};
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050053#define xfs_btree_magic(cur) \
54 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110056STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110057xfs_btree_check_lblock(
58 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110059 struct xfs_btree_block *block, /* btree long form block pointer */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110060 int level, /* level of the btree block */
61 struct xfs_buf *bp) /* buffer for block, if any */
62{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050063 int lblock_ok = 1; /* block passes checks */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110064 struct xfs_mount *mp; /* file system mount point */
65
66 mp = cur->bc_mp;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050067
68 if (xfs_sb_version_hascrc(&mp->m_sb)) {
69 lblock_ok = lblock_ok &&
Eric Sandeence748ea2015-07-29 11:53:31 +100070 uuid_equal(&block->bb_u.l.bb_uuid,
71 &mp->m_sb.sb_meta_uuid) &&
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050072 block->bb_u.l.bb_blkno == cpu_to_be64(
73 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
74 }
75
76 lblock_ok = lblock_ok &&
77 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110078 be16_to_cpu(block->bb_level) == level &&
79 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +110080 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110081 block->bb_u.l.bb_leftsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100082 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110083 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050084 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110085 block->bb_u.l.bb_rightsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100086 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110087 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050088 be64_to_cpu(block->bb_u.l.bb_rightsib)));
89
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110090 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
91 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
92 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
93 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000094 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050095 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +100096 return -EFSCORRUPTED;
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110097 }
98 return 0;
99}
100
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100101STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102xfs_btree_check_sblock(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100103 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100104 struct xfs_btree_block *block, /* btree short form block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 int level, /* level of the btree block */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100106 struct xfs_buf *bp) /* buffer containing block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500108 struct xfs_mount *mp; /* file system mount point */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100109 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
110 struct xfs_agf *agf; /* ag. freespace structure */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 xfs_agblock_t agflen; /* native ag. freespace length */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500112 int sblock_ok = 1; /* block passes checks */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500114 mp = cur->bc_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 agbp = cur->bc_private.a.agbp;
116 agf = XFS_BUF_TO_AGF(agbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +1100117 agflen = be32_to_cpu(agf->agf_length);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500118
119 if (xfs_sb_version_hascrc(&mp->m_sb)) {
120 sblock_ok = sblock_ok &&
Eric Sandeence748ea2015-07-29 11:53:31 +1000121 uuid_equal(&block->bb_u.s.bb_uuid,
122 &mp->m_sb.sb_meta_uuid) &&
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500123 block->bb_u.s.bb_blkno == cpu_to_be64(
124 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
125 }
126
127 sblock_ok = sblock_ok &&
128 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwig16259e72005-11-02 15:11:25 +1100129 be16_to_cpu(block->bb_level) == level &&
130 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +1100131 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200132 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100133 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
134 block->bb_u.s.bb_leftsib &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200135 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100136 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
137 block->bb_u.s.bb_rightsib;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500138
139 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
141 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
142 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000143 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500144 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000145 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 }
147 return 0;
148}
149
150/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100151 * Debug routine: check that block header is ok.
152 */
153int
154xfs_btree_check_block(
155 struct xfs_btree_cur *cur, /* btree cursor */
156 struct xfs_btree_block *block, /* generic btree block pointer */
157 int level, /* level of the btree block */
158 struct xfs_buf *bp) /* buffer containing block, if any */
159{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100160 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
161 return xfs_btree_check_lblock(cur, block, level, bp);
162 else
163 return xfs_btree_check_sblock(cur, block, level, bp);
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100164}
165
166/*
167 * Check that (long) pointer is ok.
168 */
169int /* error (0 or EFSCORRUPTED) */
170xfs_btree_check_lptr(
171 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000172 xfs_fsblock_t bno, /* btree block disk address */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100173 int level) /* btree block level */
174{
Eric Sandeen5fb5aee2015-02-23 22:39:13 +1100175 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100176 level > 0 &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000177 bno != NULLFSBLOCK &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100178 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
179 return 0;
180}
181
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100182#ifdef DEBUG
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100183/*
184 * Check that (short) pointer is ok.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100186STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187xfs_btree_check_sptr(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100188 struct xfs_btree_cur *cur, /* btree cursor */
189 xfs_agblock_t bno, /* btree block disk address */
190 int level) /* btree block level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191{
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100192 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193
Eric Sandeen5fb5aee2015-02-23 22:39:13 +1100194 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 level > 0 &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100196 bno != NULLAGBLOCK &&
197 bno != 0 &&
198 bno < agblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return 0;
200}
201
202/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100203 * Check that block ptr is ok.
204 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100205STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100206xfs_btree_check_ptr(
207 struct xfs_btree_cur *cur, /* btree cursor */
208 union xfs_btree_ptr *ptr, /* btree block disk address */
209 int index, /* offset from ptr to check */
210 int level) /* btree block level */
211{
212 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
213 return xfs_btree_check_lptr(cur,
214 be64_to_cpu((&ptr->l)[index]), level);
215 } else {
216 return xfs_btree_check_sptr(cur,
217 be32_to_cpu((&ptr->s)[index]), level);
218 }
219}
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100220#endif
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100221
222/*
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500223 * Calculate CRC on the whole btree block and stuff it into the
224 * long-form btree header.
225 *
226 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100227 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500228 * it to disk.
229 */
230void
231xfs_btree_lblock_calc_crc(
232 struct xfs_buf *bp)
233{
234 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
235 struct xfs_buf_log_item *bip = bp->b_fspriv;
236
237 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
238 return;
239 if (bip)
240 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100241 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500242}
243
244bool
245xfs_btree_lblock_verify_crc(
246 struct xfs_buf *bp)
247{
Brian Fostera45086e2015-10-12 15:59:25 +1100248 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
249 struct xfs_mount *mp = bp->b_target->bt_mount;
250
251 if (xfs_sb_version_hascrc(&mp->m_sb)) {
252 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
253 return false;
Eric Sandeen51582172014-02-27 15:17:27 +1100254 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100255 }
Eric Sandeen51582172014-02-27 15:17:27 +1100256
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500257 return true;
258}
259
260/*
261 * Calculate CRC on the whole btree block and stuff it into the
262 * short-form btree header.
263 *
264 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100265 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500266 * it to disk.
267 */
268void
269xfs_btree_sblock_calc_crc(
270 struct xfs_buf *bp)
271{
272 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
273 struct xfs_buf_log_item *bip = bp->b_fspriv;
274
275 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
276 return;
277 if (bip)
278 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100279 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500280}
281
282bool
283xfs_btree_sblock_verify_crc(
284 struct xfs_buf *bp)
285{
Brian Fostera45086e2015-10-12 15:59:25 +1100286 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
287 struct xfs_mount *mp = bp->b_target->bt_mount;
288
289 if (xfs_sb_version_hascrc(&mp->m_sb)) {
290 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
291 return false;
Eric Sandeen51582172014-02-27 15:17:27 +1100292 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100293 }
Eric Sandeen51582172014-02-27 15:17:27 +1100294
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500295 return true;
296}
297
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100298static int
299xfs_btree_free_block(
300 struct xfs_btree_cur *cur,
301 struct xfs_buf *bp)
302{
303 int error;
304
305 error = cur->bc_ops->free_block(cur, bp);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100306 if (!error) {
307 xfs_trans_binval(cur->bc_tp, bp);
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100308 XFS_BTREE_STATS_INC(cur, free);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100309 }
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100310 return error;
311}
312
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500313/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 * Delete the btree cursor.
315 */
316void
317xfs_btree_del_cursor(
318 xfs_btree_cur_t *cur, /* btree cursor */
319 int error) /* del because of error */
320{
321 int i; /* btree level */
322
323 /*
324 * Clear the buffer pointers, and release the buffers.
325 * If we're doing this in the face of an error, we
326 * need to make sure to inspect all of the entries
327 * in the bc_bufs array for buffers to be unlocked.
328 * This is because some of the btree code works from
329 * level n down to 0, and if we get an error along
330 * the way we won't have initialized all the entries
331 * down to 0.
332 */
333 for (i = 0; i < cur->bc_nlevels; i++) {
334 if (cur->bc_bufs[i])
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000335 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 else if (!error)
337 break;
338 }
339 /*
340 * Can't free a bmap cursor without having dealt with the
341 * allocated indirect blocks' accounting.
342 */
343 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
344 cur->bc_private.b.allocated == 0);
345 /*
346 * Free the cursor.
347 */
348 kmem_zone_free(xfs_btree_cur_zone, cur);
349}
350
351/*
352 * Duplicate the btree cursor.
353 * Allocate a new one, copy the record, re-get the buffers.
354 */
355int /* error */
356xfs_btree_dup_cursor(
357 xfs_btree_cur_t *cur, /* input cursor */
358 xfs_btree_cur_t **ncur) /* output cursor */
359{
360 xfs_buf_t *bp; /* btree block's buffer pointer */
361 int error; /* error return value */
362 int i; /* level number of btree block */
363 xfs_mount_t *mp; /* mount structure for filesystem */
364 xfs_btree_cur_t *new; /* new cursor value */
365 xfs_trans_t *tp; /* transaction pointer, can be NULL */
366
367 tp = cur->bc_tp;
368 mp = cur->bc_mp;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100369
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 /*
371 * Allocate a new cursor like the old one.
372 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100373 new = cur->bc_ops->dup_cursor(cur);
374
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 /*
376 * Copy the record currently in the cursor.
377 */
378 new->bc_rec = cur->bc_rec;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100379
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 /*
381 * For each level current, re-get the buffer and copy the ptr value.
382 */
383 for (i = 0; i < new->bc_nlevels; i++) {
384 new->bc_ptrs[i] = cur->bc_ptrs[i];
385 new->bc_ra[i] = cur->bc_ra[i];
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100386 bp = cur->bc_bufs[i];
387 if (bp) {
388 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
389 XFS_BUF_ADDR(bp), mp->m_bsize,
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100390 0, &bp,
Dave Chinner1813dd62012-11-14 17:54:40 +1100391 cur->bc_ops->buf_ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100392 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 xfs_btree_del_cursor(new, error);
394 *ncur = NULL;
395 return error;
396 }
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500397 }
398 new->bc_bufs[i] = bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 *ncur = new;
401 return 0;
402}
403
404/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100405 * XFS btree block layout and addressing:
406 *
407 * There are two types of blocks in the btree: leaf and non-leaf blocks.
408 *
409 * The leaf record start with a header then followed by records containing
410 * the values. A non-leaf block also starts with the same header, and
411 * then first contains lookup keys followed by an equal number of pointers
412 * to the btree blocks at the previous level.
413 *
414 * +--------+-------+-------+-------+-------+-------+-------+
415 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
416 * +--------+-------+-------+-------+-------+-------+-------+
417 *
418 * +--------+-------+-------+-------+-------+-------+-------+
419 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
420 * +--------+-------+-------+-------+-------+-------+-------+
421 *
422 * The header is called struct xfs_btree_block for reasons better left unknown
423 * and comes in different versions for short (32bit) and long (64bit) block
424 * pointers. The record and key structures are defined by the btree instances
425 * and opaque to the btree core. The block pointers are simple disk endian
426 * integers, available in a short (32bit) and long (64bit) variant.
427 *
428 * The helpers below calculate the offset of a given record, key or pointer
429 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
430 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
431 * inside the btree block is done using indices starting at one, not zero!
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000432 *
433 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
434 * overlapping intervals. In such a tree, records are still sorted lowest to
435 * highest and indexed by the smallest key value that refers to the record.
436 * However, nodes are different: each pointer has two associated keys -- one
437 * indexing the lowest key available in the block(s) below (the same behavior
438 * as the key in a regular btree) and another indexing the highest key
439 * available in the block(s) below. Because records are /not/ sorted by the
440 * highest key, all leaf block updates require us to compute the highest key
441 * that matches any record in the leaf and to recursively update the high keys
442 * in the nodes going further up in the tree, if necessary. Nodes look like
443 * this:
444 *
445 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
446 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
447 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
448 *
449 * To perform an interval query on an overlapped tree, perform the usual
450 * depth-first search and use the low and high keys to decide if we can skip
451 * that particular node. If a leaf node is reached, return the records that
452 * intersect the interval. Note that an interval query may return numerous
453 * entries. For a non-overlapped tree, simply search for the record associated
454 * with the lowest key and iterate forward until a non-matching record is
455 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
456 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
457 * more detail.
458 *
459 * Why do we care about overlapping intervals? Let's say you have a bunch of
460 * reverse mapping records on a reflink filesystem:
461 *
462 * 1: +- file A startblock B offset C length D -----------+
463 * 2: +- file E startblock F offset G length H --------------+
464 * 3: +- file I startblock F offset J length K --+
465 * 4: +- file L... --+
466 *
467 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
468 * we'd simply increment the length of record 1. But how do we find the record
469 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
470 * record 3 because the keys are ordered first by startblock. An interval
471 * query would return records 1 and 2 because they both overlap (B+D-1), and
472 * from that we can pick out record 1 as the appropriate left neighbor.
473 *
474 * In the non-overlapped case you can do a LE lookup and decrement the cursor
475 * because a record's interval must end before the next record.
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100476 */
477
478/*
479 * Return size of the btree block header for this btree instance.
480 */
481static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
482{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500483 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
484 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
485 return XFS_BTREE_LBLOCK_CRC_LEN;
486 return XFS_BTREE_LBLOCK_LEN;
487 }
488 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
489 return XFS_BTREE_SBLOCK_CRC_LEN;
490 return XFS_BTREE_SBLOCK_LEN;
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100491}
492
493/*
494 * Return size of btree block pointers for this btree instance.
495 */
496static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
497{
498 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
499 sizeof(__be64) : sizeof(__be32);
500}
501
502/*
503 * Calculate offset of the n-th record in a btree block.
504 */
505STATIC size_t
506xfs_btree_rec_offset(
507 struct xfs_btree_cur *cur,
508 int n)
509{
510 return xfs_btree_block_len(cur) +
511 (n - 1) * cur->bc_ops->rec_len;
512}
513
514/*
515 * Calculate offset of the n-th key in a btree block.
516 */
517STATIC size_t
518xfs_btree_key_offset(
519 struct xfs_btree_cur *cur,
520 int n)
521{
522 return xfs_btree_block_len(cur) +
523 (n - 1) * cur->bc_ops->key_len;
524}
525
526/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000527 * Calculate offset of the n-th high key in a btree block.
528 */
529STATIC size_t
530xfs_btree_high_key_offset(
531 struct xfs_btree_cur *cur,
532 int n)
533{
534 return xfs_btree_block_len(cur) +
535 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
536}
537
538/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100539 * Calculate offset of the n-th block pointer in a btree block.
540 */
541STATIC size_t
542xfs_btree_ptr_offset(
543 struct xfs_btree_cur *cur,
544 int n,
545 int level)
546{
547 return xfs_btree_block_len(cur) +
548 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
549 (n - 1) * xfs_btree_ptr_len(cur);
550}
551
552/*
553 * Return a pointer to the n-th record in the btree block.
554 */
555STATIC union xfs_btree_rec *
556xfs_btree_rec_addr(
557 struct xfs_btree_cur *cur,
558 int n,
559 struct xfs_btree_block *block)
560{
561 return (union xfs_btree_rec *)
562 ((char *)block + xfs_btree_rec_offset(cur, n));
563}
564
565/*
566 * Return a pointer to the n-th key in the btree block.
567 */
568STATIC union xfs_btree_key *
569xfs_btree_key_addr(
570 struct xfs_btree_cur *cur,
571 int n,
572 struct xfs_btree_block *block)
573{
574 return (union xfs_btree_key *)
575 ((char *)block + xfs_btree_key_offset(cur, n));
576}
577
578/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000579 * Return a pointer to the n-th high key in the btree block.
580 */
581STATIC union xfs_btree_key *
582xfs_btree_high_key_addr(
583 struct xfs_btree_cur *cur,
584 int n,
585 struct xfs_btree_block *block)
586{
587 return (union xfs_btree_key *)
588 ((char *)block + xfs_btree_high_key_offset(cur, n));
589}
590
591/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100592 * Return a pointer to the n-th block pointer in the btree block.
593 */
594STATIC union xfs_btree_ptr *
595xfs_btree_ptr_addr(
596 struct xfs_btree_cur *cur,
597 int n,
598 struct xfs_btree_block *block)
599{
600 int level = xfs_btree_get_level(block);
601
602 ASSERT(block->bb_level != 0);
603
604 return (union xfs_btree_ptr *)
605 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
606}
607
608/*
Zhi Yong Wu1cb93862013-08-07 10:11:05 +0000609 * Get the root block which is stored in the inode.
Christoph Hellwig8186e512008-10-30 16:54:22 +1100610 *
611 * For now this btree implementation assumes the btree root is always
612 * stored in the if_broot field of an inode fork.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 */
Christoph Hellwig8186e512008-10-30 16:54:22 +1100614STATIC struct xfs_btree_block *
615xfs_btree_get_iroot(
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000616 struct xfs_btree_cur *cur)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000618 struct xfs_ifork *ifp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000620 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
621 return (struct xfs_btree_block *)ifp->if_broot;
Christoph Hellwig8186e512008-10-30 16:54:22 +1100622}
623
624/*
625 * Retrieve the block pointer from the cursor at the given level.
626 * This may be an inode btree root or from a buffer.
627 */
628STATIC struct xfs_btree_block * /* generic btree block pointer */
629xfs_btree_get_block(
630 struct xfs_btree_cur *cur, /* btree cursor */
631 int level, /* level in btree */
632 struct xfs_buf **bpp) /* buffer containing the block */
633{
634 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
635 (level == cur->bc_nlevels - 1)) {
636 *bpp = NULL;
637 return xfs_btree_get_iroot(cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 }
Christoph Hellwig8186e512008-10-30 16:54:22 +1100639
640 *bpp = cur->bc_bufs[level];
641 return XFS_BUF_TO_BLOCK(*bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642}
643
644/*
645 * Get a buffer for the block, return it with no data read.
646 * Long-form addressing.
647 */
648xfs_buf_t * /* buffer for fsbno */
649xfs_btree_get_bufl(
650 xfs_mount_t *mp, /* file system mount point */
651 xfs_trans_t *tp, /* transaction pointer */
652 xfs_fsblock_t fsbno, /* file system block number */
653 uint lock) /* lock flags for get_buf */
654{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 xfs_daddr_t d; /* real disk block address */
656
657 ASSERT(fsbno != NULLFSBLOCK);
658 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000659 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660}
661
662/*
663 * Get a buffer for the block, return it with no data read.
664 * Short-form addressing.
665 */
666xfs_buf_t * /* buffer for agno/agbno */
667xfs_btree_get_bufs(
668 xfs_mount_t *mp, /* file system mount point */
669 xfs_trans_t *tp, /* transaction pointer */
670 xfs_agnumber_t agno, /* allocation group number */
671 xfs_agblock_t agbno, /* allocation group block number */
672 uint lock) /* lock flags for get_buf */
673{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 xfs_daddr_t d; /* real disk block address */
675
676 ASSERT(agno != NULLAGNUMBER);
677 ASSERT(agbno != NULLAGBLOCK);
678 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000679 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680}
681
682/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 * Check for the cursor referring to the last block at the given level.
684 */
685int /* 1=is last block, 0=not last block */
686xfs_btree_islastblock(
687 xfs_btree_cur_t *cur, /* btree cursor */
688 int level) /* level to check */
689{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100690 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 xfs_buf_t *bp; /* buffer containing block */
692
693 block = xfs_btree_get_block(cur, level, &bp);
694 xfs_btree_check_block(cur, block, level, bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100695 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000696 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200698 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699}
700
701/*
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000702 * Change the cursor to point to the first record at the given level.
703 * Other levels are unaffected.
704 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100705STATIC int /* success=1, failure=0 */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000706xfs_btree_firstrec(
707 xfs_btree_cur_t *cur, /* btree cursor */
708 int level) /* level to change */
709{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100710 struct xfs_btree_block *block; /* generic btree block pointer */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000711 xfs_buf_t *bp; /* buffer containing block */
712
713 /*
714 * Get the block pointer for this level.
715 */
716 block = xfs_btree_get_block(cur, level, &bp);
717 xfs_btree_check_block(cur, block, level, bp);
718 /*
719 * It's empty, there is no such record.
720 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100721 if (!block->bb_numrecs)
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000722 return 0;
723 /*
724 * Set the ptr value to 1, that's the first record/key.
725 */
726 cur->bc_ptrs[level] = 1;
727 return 1;
728}
729
730/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 * Change the cursor to point to the last record in the current block
732 * at the given level. Other levels are unaffected.
733 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100734STATIC int /* success=1, failure=0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735xfs_btree_lastrec(
736 xfs_btree_cur_t *cur, /* btree cursor */
737 int level) /* level to change */
738{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100739 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 xfs_buf_t *bp; /* buffer containing block */
741
742 /*
743 * Get the block pointer for this level.
744 */
745 block = xfs_btree_get_block(cur, level, &bp);
746 xfs_btree_check_block(cur, block, level, bp);
747 /*
748 * It's empty, there is no such record.
749 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100750 if (!block->bb_numrecs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 return 0;
752 /*
753 * Set the ptr value to numrecs, that's the last record/key.
754 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100755 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 return 1;
757}
758
759/*
760 * Compute first and last byte offsets for the fields given.
761 * Interprets the offsets table, which contains struct field offsets.
762 */
763void
764xfs_btree_offsets(
765 __int64_t fields, /* bitmask of fields */
766 const short *offsets, /* table of field offsets */
767 int nbits, /* number of bits to inspect */
768 int *first, /* output: first byte offset */
769 int *last) /* output: last byte offset */
770{
771 int i; /* current bit number */
772 __int64_t imask; /* mask for current bit number */
773
774 ASSERT(fields != 0);
775 /*
776 * Find the lowest bit, so the first byte offset.
777 */
778 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
779 if (imask & fields) {
780 *first = offsets[i];
781 break;
782 }
783 }
784 /*
785 * Find the highest bit, so the last byte offset.
786 */
787 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
788 if (imask & fields) {
789 *last = offsets[i + 1] - 1;
790 break;
791 }
792 }
793}
794
795/*
796 * Get a buffer for the block, return it read in.
797 * Long-form addressing.
798 */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100799int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800xfs_btree_read_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100801 struct xfs_mount *mp, /* file system mount point */
802 struct xfs_trans *tp, /* transaction pointer */
803 xfs_fsblock_t fsbno, /* file system block number */
804 uint lock, /* lock flags for read_buf */
805 struct xfs_buf **bpp, /* buffer for fsbno */
806 int refval, /* ref count value for buffer */
Dave Chinner1813dd62012-11-14 17:54:40 +1100807 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808{
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100809 struct xfs_buf *bp; /* return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 xfs_daddr_t d; /* real disk block address */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100811 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
813 ASSERT(fsbno != NULLFSBLOCK);
814 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100815 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
Dave Chinner1813dd62012-11-14 17:54:40 +1100816 mp->m_bsize, lock, &bp, ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100817 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 return error;
Dave Chinner821eb212010-12-02 16:31:13 +1100819 if (bp)
Christoph Hellwig38f23232011-10-10 16:52:45 +0000820 xfs_buf_set_ref(bp, refval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 *bpp = bp;
822 return 0;
823}
824
825/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 * Read-ahead the block, don't wait for it, don't return a buffer.
827 * Long-form addressing.
828 */
829/* ARGSUSED */
830void
831xfs_btree_reada_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100832 struct xfs_mount *mp, /* file system mount point */
833 xfs_fsblock_t fsbno, /* file system block number */
834 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100835 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836{
837 xfs_daddr_t d;
838
839 ASSERT(fsbno != NULLFSBLOCK);
840 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100841 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842}
843
844/*
845 * Read-ahead the block, don't wait for it, don't return a buffer.
846 * Short-form addressing.
847 */
848/* ARGSUSED */
849void
850xfs_btree_reada_bufs(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100851 struct xfs_mount *mp, /* file system mount point */
852 xfs_agnumber_t agno, /* allocation group number */
853 xfs_agblock_t agbno, /* allocation group block number */
854 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100855 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856{
857 xfs_daddr_t d;
858
859 ASSERT(agno != NULLAGNUMBER);
860 ASSERT(agbno != NULLAGBLOCK);
861 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100862 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863}
864
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100865STATIC int
866xfs_btree_readahead_lblock(
867 struct xfs_btree_cur *cur,
868 int lr,
869 struct xfs_btree_block *block)
870{
871 int rval = 0;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000872 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
873 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100874
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000875 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100876 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100877 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100878 rval++;
879 }
880
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000881 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100882 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100883 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100884 rval++;
885 }
886
887 return rval;
888}
889
890STATIC int
891xfs_btree_readahead_sblock(
892 struct xfs_btree_cur *cur,
893 int lr,
894 struct xfs_btree_block *block)
895{
896 int rval = 0;
897 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
898 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
899
900
901 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
902 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100903 left, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100904 rval++;
905 }
906
907 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
908 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100909 right, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100910 rval++;
911 }
912
913 return rval;
914}
915
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916/*
917 * Read-ahead btree blocks, at the given level.
918 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
919 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100920STATIC int
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100921xfs_btree_readahead(
922 struct xfs_btree_cur *cur, /* btree cursor */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 int lev, /* level in btree */
924 int lr) /* left/right bits */
925{
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100926 struct xfs_btree_block *block;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100928 /*
929 * No readahead needed if we are at the root level and the
930 * btree root is stored in the inode.
931 */
932 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
933 (lev == cur->bc_nlevels - 1))
934 return 0;
935
936 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
937 return 0;
938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 cur->bc_ra[lev] |= lr;
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100940 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
941
942 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
943 return xfs_btree_readahead_lblock(cur, lr, block);
944 return xfs_btree_readahead_sblock(cur, lr, block);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945}
946
Dave Chinner21b5c972013-08-30 10:23:44 +1000947STATIC xfs_daddr_t
948xfs_btree_ptr_to_daddr(
949 struct xfs_btree_cur *cur,
950 union xfs_btree_ptr *ptr)
951{
952 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000953 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
Dave Chinner21b5c972013-08-30 10:23:44 +1000954
955 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
956 } else {
957 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
958 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
959
960 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
961 be32_to_cpu(ptr->s));
962 }
963}
964
965/*
966 * Readahead @count btree blocks at the given @ptr location.
967 *
968 * We don't need to care about long or short form btrees here as we have a
969 * method of converting the ptr directly to a daddr available to us.
970 */
971STATIC void
972xfs_btree_readahead_ptr(
973 struct xfs_btree_cur *cur,
974 union xfs_btree_ptr *ptr,
975 xfs_extlen_t count)
976{
977 xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
978 xfs_btree_ptr_to_daddr(cur, ptr),
979 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
980}
981
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982/*
983 * Set the buffer for level "lev" in the cursor to bp, releasing
984 * any previous buffer.
985 */
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000986STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987xfs_btree_setbuf(
988 xfs_btree_cur_t *cur, /* btree cursor */
989 int lev, /* level in btree */
990 xfs_buf_t *bp) /* new buffer to set */
991{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100992 struct xfs_btree_block *b; /* btree block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000994 if (cur->bc_bufs[lev])
995 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 cur->bc_bufs[lev] = bp;
997 cur->bc_ra[lev] = 0;
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000998
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 b = XFS_BUF_TO_BLOCK(bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +11001000 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001001 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001003 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1005 } else {
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001006 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001008 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1010 }
1011}
Christoph Hellwig637aa502008-10-30 16:55:45 +11001012
1013STATIC int
1014xfs_btree_ptr_is_null(
1015 struct xfs_btree_cur *cur,
1016 union xfs_btree_ptr *ptr)
1017{
1018 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001019 return ptr->l == cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001020 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001021 return ptr->s == cpu_to_be32(NULLAGBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001022}
1023
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001024STATIC void
1025xfs_btree_set_ptr_null(
1026 struct xfs_btree_cur *cur,
1027 union xfs_btree_ptr *ptr)
1028{
1029 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001030 ptr->l = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001031 else
1032 ptr->s = cpu_to_be32(NULLAGBLOCK);
1033}
1034
Christoph Hellwig637aa502008-10-30 16:55:45 +11001035/*
1036 * Get/set/init sibling pointers
1037 */
1038STATIC void
1039xfs_btree_get_sibling(
1040 struct xfs_btree_cur *cur,
1041 struct xfs_btree_block *block,
1042 union xfs_btree_ptr *ptr,
1043 int lr)
1044{
1045 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1046
1047 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1048 if (lr == XFS_BB_RIGHTSIB)
1049 ptr->l = block->bb_u.l.bb_rightsib;
1050 else
1051 ptr->l = block->bb_u.l.bb_leftsib;
1052 } else {
1053 if (lr == XFS_BB_RIGHTSIB)
1054 ptr->s = block->bb_u.s.bb_rightsib;
1055 else
1056 ptr->s = block->bb_u.s.bb_leftsib;
1057 }
1058}
1059
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001060STATIC void
1061xfs_btree_set_sibling(
1062 struct xfs_btree_cur *cur,
1063 struct xfs_btree_block *block,
1064 union xfs_btree_ptr *ptr,
1065 int lr)
1066{
1067 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1068
1069 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1070 if (lr == XFS_BB_RIGHTSIB)
1071 block->bb_u.l.bb_rightsib = ptr->l;
1072 else
1073 block->bb_u.l.bb_leftsib = ptr->l;
1074 } else {
1075 if (lr == XFS_BB_RIGHTSIB)
1076 block->bb_u.s.bb_rightsib = ptr->s;
1077 else
1078 block->bb_u.s.bb_leftsib = ptr->s;
1079 }
1080}
1081
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001082void
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001083xfs_btree_init_block_int(
1084 struct xfs_mount *mp,
1085 struct xfs_btree_block *buf,
1086 xfs_daddr_t blkno,
1087 __u32 magic,
1088 __u16 level,
1089 __u16 numrecs,
1090 __u64 owner,
1091 unsigned int flags)
1092{
1093 buf->bb_magic = cpu_to_be32(magic);
1094 buf->bb_level = cpu_to_be16(level);
1095 buf->bb_numrecs = cpu_to_be16(numrecs);
1096
1097 if (flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001098 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1099 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001100 if (flags & XFS_BTREE_CRC_BLOCKS) {
1101 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1102 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001103 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001104 buf->bb_u.l.bb_pad = 0;
Dave Chinnerb58fa552013-08-28 21:22:46 +10001105 buf->bb_u.l.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001106 }
1107 } else {
1108 /* owner is a 32 bit value on short blocks */
1109 __u32 __owner = (__u32)owner;
1110
1111 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1112 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1113 if (flags & XFS_BTREE_CRC_BLOCKS) {
1114 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1115 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001116 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
Dave Chinnerb58fa552013-08-28 21:22:46 +10001117 buf->bb_u.s.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001118 }
1119 }
1120}
1121
1122void
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001123xfs_btree_init_block(
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001124 struct xfs_mount *mp,
1125 struct xfs_buf *bp,
1126 __u32 magic,
1127 __u16 level,
1128 __u16 numrecs,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001129 __u64 owner,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001130 unsigned int flags)
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001131{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001132 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1133 magic, level, numrecs, owner, flags);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001134}
1135
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001136STATIC void
1137xfs_btree_init_block_cur(
1138 struct xfs_btree_cur *cur,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001139 struct xfs_buf *bp,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001140 int level,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001141 int numrecs)
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001142{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001143 __u64 owner;
1144
1145 /*
1146 * we can pull the owner from the cursor right now as the different
1147 * owners align directly with the pointer size of the btree. This may
1148 * change in future, but is safe for current users of the generic btree
1149 * code.
1150 */
1151 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1152 owner = cur->bc_private.b.ip->i_ino;
1153 else
1154 owner = cur->bc_private.a.agno;
1155
1156 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1157 xfs_btree_magic(cur), level, numrecs,
1158 owner, cur->bc_flags);
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001159}
1160
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001161/*
1162 * Return true if ptr is the last record in the btree and
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001163 * we need to track updates to this record. The decision
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001164 * will be further refined in the update_lastrec method.
1165 */
1166STATIC int
1167xfs_btree_is_lastrec(
1168 struct xfs_btree_cur *cur,
1169 struct xfs_btree_block *block,
1170 int level)
1171{
1172 union xfs_btree_ptr ptr;
1173
1174 if (level > 0)
1175 return 0;
1176 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1177 return 0;
1178
1179 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1180 if (!xfs_btree_ptr_is_null(cur, &ptr))
1181 return 0;
1182 return 1;
1183}
1184
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001185STATIC void
1186xfs_btree_buf_to_ptr(
1187 struct xfs_btree_cur *cur,
1188 struct xfs_buf *bp,
1189 union xfs_btree_ptr *ptr)
1190{
1191 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1192 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1193 XFS_BUF_ADDR(bp)));
1194 else {
Eric Sandeen9d87c312009-01-14 23:22:07 -06001195 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001196 XFS_BUF_ADDR(bp)));
1197 }
1198}
1199
Christoph Hellwig637aa502008-10-30 16:55:45 +11001200STATIC void
1201xfs_btree_set_refs(
1202 struct xfs_btree_cur *cur,
1203 struct xfs_buf *bp)
1204{
1205 switch (cur->bc_btnum) {
1206 case XFS_BTNUM_BNO:
1207 case XFS_BTNUM_CNT:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001208 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001209 break;
1210 case XFS_BTNUM_INO:
Brian Fosteraafc3c22014-04-24 16:00:52 +10001211 case XFS_BTNUM_FINO:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001212 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001213 break;
1214 case XFS_BTNUM_BMAP:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001215 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001216 break;
Darrick J. Wong035e00a2016-08-03 11:36:07 +10001217 case XFS_BTNUM_RMAP:
1218 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1219 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001220 default:
1221 ASSERT(0);
1222 }
1223}
1224
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001225STATIC int
1226xfs_btree_get_buf_block(
1227 struct xfs_btree_cur *cur,
1228 union xfs_btree_ptr *ptr,
1229 int flags,
1230 struct xfs_btree_block **block,
1231 struct xfs_buf **bpp)
1232{
1233 struct xfs_mount *mp = cur->bc_mp;
1234 xfs_daddr_t d;
1235
1236 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001237 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001238
1239 d = xfs_btree_ptr_to_daddr(cur, ptr);
1240 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1241 mp->m_bsize, flags);
1242
Chandra Seetharaman2a30f36d2011-09-20 13:56:55 +00001243 if (!*bpp)
Dave Chinner24513372014-06-25 14:58:08 +10001244 return -ENOMEM;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001245
Dave Chinner1813dd62012-11-14 17:54:40 +11001246 (*bpp)->b_ops = cur->bc_ops->buf_ops;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001247 *block = XFS_BUF_TO_BLOCK(*bpp);
1248 return 0;
1249}
1250
Christoph Hellwig637aa502008-10-30 16:55:45 +11001251/*
1252 * Read in the buffer at the given ptr and return the buffer and
1253 * the block pointer within the buffer.
1254 */
1255STATIC int
1256xfs_btree_read_buf_block(
1257 struct xfs_btree_cur *cur,
1258 union xfs_btree_ptr *ptr,
Christoph Hellwig637aa502008-10-30 16:55:45 +11001259 int flags,
1260 struct xfs_btree_block **block,
1261 struct xfs_buf **bpp)
1262{
1263 struct xfs_mount *mp = cur->bc_mp;
1264 xfs_daddr_t d;
1265 int error;
1266
1267 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001268 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwig637aa502008-10-30 16:55:45 +11001269
1270 d = xfs_btree_ptr_to_daddr(cur, ptr);
1271 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001272 mp->m_bsize, flags, bpp,
Dave Chinner1813dd62012-11-14 17:54:40 +11001273 cur->bc_ops->buf_ops);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001274 if (error)
1275 return error;
1276
Christoph Hellwig637aa502008-10-30 16:55:45 +11001277 xfs_btree_set_refs(cur, *bpp);
1278 *block = XFS_BUF_TO_BLOCK(*bpp);
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001279 return 0;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001280}
1281
1282/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001283 * Copy keys from one btree block to another.
1284 */
1285STATIC void
1286xfs_btree_copy_keys(
1287 struct xfs_btree_cur *cur,
1288 union xfs_btree_key *dst_key,
1289 union xfs_btree_key *src_key,
1290 int numkeys)
1291{
1292 ASSERT(numkeys >= 0);
1293 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1294}
1295
1296/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001297 * Copy records from one btree block to another.
1298 */
1299STATIC void
1300xfs_btree_copy_recs(
1301 struct xfs_btree_cur *cur,
1302 union xfs_btree_rec *dst_rec,
1303 union xfs_btree_rec *src_rec,
1304 int numrecs)
1305{
1306 ASSERT(numrecs >= 0);
1307 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1308}
1309
1310/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001311 * Copy block pointers from one btree block to another.
1312 */
1313STATIC void
1314xfs_btree_copy_ptrs(
1315 struct xfs_btree_cur *cur,
1316 union xfs_btree_ptr *dst_ptr,
1317 union xfs_btree_ptr *src_ptr,
1318 int numptrs)
1319{
1320 ASSERT(numptrs >= 0);
1321 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1322}
1323
1324/*
1325 * Shift keys one index left/right inside a single btree block.
1326 */
1327STATIC void
1328xfs_btree_shift_keys(
1329 struct xfs_btree_cur *cur,
1330 union xfs_btree_key *key,
1331 int dir,
1332 int numkeys)
1333{
1334 char *dst_key;
1335
1336 ASSERT(numkeys >= 0);
1337 ASSERT(dir == 1 || dir == -1);
1338
1339 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1340 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1341}
1342
1343/*
1344 * Shift records one index left/right inside a single btree block.
1345 */
1346STATIC void
1347xfs_btree_shift_recs(
1348 struct xfs_btree_cur *cur,
1349 union xfs_btree_rec *rec,
1350 int dir,
1351 int numrecs)
1352{
1353 char *dst_rec;
1354
1355 ASSERT(numrecs >= 0);
1356 ASSERT(dir == 1 || dir == -1);
1357
1358 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1359 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1360}
1361
1362/*
1363 * Shift block pointers one index left/right inside a single btree block.
1364 */
1365STATIC void
1366xfs_btree_shift_ptrs(
1367 struct xfs_btree_cur *cur,
1368 union xfs_btree_ptr *ptr,
1369 int dir,
1370 int numptrs)
1371{
1372 char *dst_ptr;
1373
1374 ASSERT(numptrs >= 0);
1375 ASSERT(dir == 1 || dir == -1);
1376
1377 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1378 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1379}
1380
1381/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001382 * Log key values from the btree block.
1383 */
1384STATIC void
1385xfs_btree_log_keys(
1386 struct xfs_btree_cur *cur,
1387 struct xfs_buf *bp,
1388 int first,
1389 int last)
1390{
1391 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1392 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1393
1394 if (bp) {
Dave Chinner61fe1352013-04-03 16:11:30 +11001395 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001396 xfs_trans_log_buf(cur->bc_tp, bp,
1397 xfs_btree_key_offset(cur, first),
1398 xfs_btree_key_offset(cur, last + 1) - 1);
1399 } else {
1400 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1401 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1402 }
1403
1404 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1405}
1406
1407/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001408 * Log record values from the btree block.
1409 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001410void
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001411xfs_btree_log_recs(
1412 struct xfs_btree_cur *cur,
1413 struct xfs_buf *bp,
1414 int first,
1415 int last)
1416{
1417 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1418 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1419
Dave Chinner61fe1352013-04-03 16:11:30 +11001420 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001421 xfs_trans_log_buf(cur->bc_tp, bp,
1422 xfs_btree_rec_offset(cur, first),
1423 xfs_btree_rec_offset(cur, last + 1) - 1);
1424
1425 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1426}
1427
1428/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001429 * Log block pointer fields from a btree block (nonleaf).
1430 */
1431STATIC void
1432xfs_btree_log_ptrs(
1433 struct xfs_btree_cur *cur, /* btree cursor */
1434 struct xfs_buf *bp, /* buffer containing btree block */
1435 int first, /* index of first pointer to log */
1436 int last) /* index of last pointer to log */
1437{
1438 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1439 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1440
1441 if (bp) {
1442 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1443 int level = xfs_btree_get_level(block);
1444
Dave Chinner61fe1352013-04-03 16:11:30 +11001445 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001446 xfs_trans_log_buf(cur->bc_tp, bp,
1447 xfs_btree_ptr_offset(cur, first, level),
1448 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1449 } else {
1450 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1451 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1452 }
1453
1454 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1455}
1456
1457/*
1458 * Log fields from a btree block header.
1459 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001460void
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001461xfs_btree_log_block(
1462 struct xfs_btree_cur *cur, /* btree cursor */
1463 struct xfs_buf *bp, /* buffer containing btree block */
1464 int fields) /* mask of fields: XFS_BB_... */
1465{
1466 int first; /* first byte offset logged */
1467 int last; /* last byte offset logged */
1468 static const short soffsets[] = { /* table of offsets (short) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001469 offsetof(struct xfs_btree_block, bb_magic),
1470 offsetof(struct xfs_btree_block, bb_level),
1471 offsetof(struct xfs_btree_block, bb_numrecs),
1472 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1473 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001474 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1475 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1476 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1477 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1478 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1479 XFS_BTREE_SBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001480 };
1481 static const short loffsets[] = { /* table of offsets (long) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001482 offsetof(struct xfs_btree_block, bb_magic),
1483 offsetof(struct xfs_btree_block, bb_level),
1484 offsetof(struct xfs_btree_block, bb_numrecs),
1485 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1486 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001487 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1488 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1489 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1490 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1491 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1492 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1493 XFS_BTREE_LBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001494 };
1495
1496 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1497 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1498
1499 if (bp) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001500 int nbits;
1501
1502 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1503 /*
1504 * We don't log the CRC when updating a btree
1505 * block but instead recreate it during log
1506 * recovery. As the log buffers have checksums
1507 * of their own this is safe and avoids logging a crc
1508 * update in a lot of places.
1509 */
1510 if (fields == XFS_BB_ALL_BITS)
1511 fields = XFS_BB_ALL_BITS_CRC;
1512 nbits = XFS_BB_NUM_BITS_CRC;
1513 } else {
1514 nbits = XFS_BB_NUM_BITS;
1515 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001516 xfs_btree_offsets(fields,
1517 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1518 loffsets : soffsets,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001519 nbits, &first, &last);
Dave Chinner61fe1352013-04-03 16:11:30 +11001520 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001521 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1522 } else {
1523 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1524 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1525 }
1526
1527 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1528}
1529
1530/*
Christoph Hellwig637aa502008-10-30 16:55:45 +11001531 * Increment cursor by one record at the level.
1532 * For nonzero levels the leaf-ward information is untouched.
1533 */
1534int /* error */
1535xfs_btree_increment(
1536 struct xfs_btree_cur *cur,
1537 int level,
1538 int *stat) /* success/failure */
1539{
1540 struct xfs_btree_block *block;
1541 union xfs_btree_ptr ptr;
1542 struct xfs_buf *bp;
1543 int error; /* error return value */
1544 int lev;
1545
1546 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1547 XFS_BTREE_TRACE_ARGI(cur, level);
1548
1549 ASSERT(level < cur->bc_nlevels);
1550
1551 /* Read-ahead to the right at this level. */
1552 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1553
1554 /* Get a pointer to the btree block. */
1555 block = xfs_btree_get_block(cur, level, &bp);
1556
1557#ifdef DEBUG
1558 error = xfs_btree_check_block(cur, block, level, bp);
1559 if (error)
1560 goto error0;
1561#endif
1562
1563 /* We're done if we remain in the block after the increment. */
1564 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1565 goto out1;
1566
1567 /* Fail if we just went off the right edge of the tree. */
1568 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1569 if (xfs_btree_ptr_is_null(cur, &ptr))
1570 goto out0;
1571
1572 XFS_BTREE_STATS_INC(cur, increment);
1573
1574 /*
1575 * March up the tree incrementing pointers.
1576 * Stop when we don't go off the right edge of a block.
1577 */
1578 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1579 block = xfs_btree_get_block(cur, lev, &bp);
1580
1581#ifdef DEBUG
1582 error = xfs_btree_check_block(cur, block, lev, bp);
1583 if (error)
1584 goto error0;
1585#endif
1586
1587 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1588 break;
1589
1590 /* Read-ahead the right block for the next loop. */
1591 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1592 }
1593
1594 /*
1595 * If we went off the root then we are either seriously
1596 * confused or have the tree root in an inode.
1597 */
1598 if (lev == cur->bc_nlevels) {
1599 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1600 goto out0;
1601 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001602 error = -EFSCORRUPTED;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001603 goto error0;
1604 }
1605 ASSERT(lev < cur->bc_nlevels);
1606
1607 /*
1608 * Now walk back down the tree, fixing up the cursor's buffer
1609 * pointers and key numbers.
1610 */
1611 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1612 union xfs_btree_ptr *ptrp;
1613
1614 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001615 --lev;
1616 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001617 if (error)
1618 goto error0;
1619
1620 xfs_btree_setbuf(cur, lev, bp);
1621 cur->bc_ptrs[lev] = 1;
1622 }
1623out1:
1624 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1625 *stat = 1;
1626 return 0;
1627
1628out0:
1629 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1630 *stat = 0;
1631 return 0;
1632
1633error0:
1634 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1635 return error;
1636}
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001637
1638/*
1639 * Decrement cursor by one record at the level.
1640 * For nonzero levels the leaf-ward information is untouched.
1641 */
1642int /* error */
1643xfs_btree_decrement(
1644 struct xfs_btree_cur *cur,
1645 int level,
1646 int *stat) /* success/failure */
1647{
1648 struct xfs_btree_block *block;
1649 xfs_buf_t *bp;
1650 int error; /* error return value */
1651 int lev;
1652 union xfs_btree_ptr ptr;
1653
1654 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1655 XFS_BTREE_TRACE_ARGI(cur, level);
1656
1657 ASSERT(level < cur->bc_nlevels);
1658
1659 /* Read-ahead to the left at this level. */
1660 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1661
1662 /* We're done if we remain in the block after the decrement. */
1663 if (--cur->bc_ptrs[level] > 0)
1664 goto out1;
1665
1666 /* Get a pointer to the btree block. */
1667 block = xfs_btree_get_block(cur, level, &bp);
1668
1669#ifdef DEBUG
1670 error = xfs_btree_check_block(cur, block, level, bp);
1671 if (error)
1672 goto error0;
1673#endif
1674
1675 /* Fail if we just went off the left edge of the tree. */
1676 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1677 if (xfs_btree_ptr_is_null(cur, &ptr))
1678 goto out0;
1679
1680 XFS_BTREE_STATS_INC(cur, decrement);
1681
1682 /*
1683 * March up the tree decrementing pointers.
1684 * Stop when we don't go off the left edge of a block.
1685 */
1686 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1687 if (--cur->bc_ptrs[lev] > 0)
1688 break;
1689 /* Read-ahead the left block for the next loop. */
1690 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1691 }
1692
1693 /*
1694 * If we went off the root then we are seriously confused.
1695 * or the root of the tree is in an inode.
1696 */
1697 if (lev == cur->bc_nlevels) {
1698 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1699 goto out0;
1700 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001701 error = -EFSCORRUPTED;
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001702 goto error0;
1703 }
1704 ASSERT(lev < cur->bc_nlevels);
1705
1706 /*
1707 * Now walk back down the tree, fixing up the cursor's buffer
1708 * pointers and key numbers.
1709 */
1710 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1711 union xfs_btree_ptr *ptrp;
1712
1713 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001714 --lev;
1715 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001716 if (error)
1717 goto error0;
1718 xfs_btree_setbuf(cur, lev, bp);
1719 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1720 }
1721out1:
1722 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1723 *stat = 1;
1724 return 0;
1725
1726out0:
1727 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1728 *stat = 0;
1729 return 0;
1730
1731error0:
1732 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1733 return error;
1734}
1735
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001736STATIC int
1737xfs_btree_lookup_get_block(
1738 struct xfs_btree_cur *cur, /* btree cursor */
1739 int level, /* level in the btree */
1740 union xfs_btree_ptr *pp, /* ptr to btree block */
1741 struct xfs_btree_block **blkp) /* return btree block */
1742{
1743 struct xfs_buf *bp; /* buffer pointer for btree block */
1744 int error = 0;
1745
1746 /* special case the root block if in an inode */
1747 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1748 (level == cur->bc_nlevels - 1)) {
1749 *blkp = xfs_btree_get_iroot(cur);
1750 return 0;
1751 }
1752
1753 /*
1754 * If the old buffer at this level for the disk address we are
1755 * looking for re-use it.
1756 *
1757 * Otherwise throw it away and get a new one.
1758 */
1759 bp = cur->bc_bufs[level];
1760 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1761 *blkp = XFS_BUF_TO_BLOCK(bp);
1762 return 0;
1763 }
1764
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001765 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001766 if (error)
1767 return error;
1768
1769 xfs_btree_setbuf(cur, level, bp);
1770 return 0;
1771}
1772
1773/*
1774 * Get current search key. For level 0 we don't actually have a key
1775 * structure so we make one up from the record. For all other levels
1776 * we just return the right key.
1777 */
1778STATIC union xfs_btree_key *
1779xfs_lookup_get_search_key(
1780 struct xfs_btree_cur *cur,
1781 int level,
1782 int keyno,
1783 struct xfs_btree_block *block,
1784 union xfs_btree_key *kp)
1785{
1786 if (level == 0) {
1787 cur->bc_ops->init_key_from_rec(kp,
1788 xfs_btree_rec_addr(cur, keyno, block));
1789 return kp;
1790 }
1791
1792 return xfs_btree_key_addr(cur, keyno, block);
1793}
1794
1795/*
1796 * Lookup the record. The cursor is made to point to it, based on dir.
Zhi Yong Wu49d3da12013-08-07 10:11:00 +00001797 * stat is set to 0 if can't find any such record, 1 for success.
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001798 */
1799int /* error */
1800xfs_btree_lookup(
1801 struct xfs_btree_cur *cur, /* btree cursor */
1802 xfs_lookup_t dir, /* <=, ==, or >= */
1803 int *stat) /* success/failure */
1804{
1805 struct xfs_btree_block *block; /* current btree block */
1806 __int64_t diff; /* difference for the current key */
1807 int error; /* error return value */
1808 int keyno; /* current key number */
1809 int level; /* level in the btree */
1810 union xfs_btree_ptr *pp; /* ptr to btree block */
1811 union xfs_btree_ptr ptr; /* ptr to btree block */
1812
1813 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1814 XFS_BTREE_TRACE_ARGI(cur, dir);
1815
1816 XFS_BTREE_STATS_INC(cur, lookup);
1817
Darrick J. Wonged150e12016-08-26 15:58:40 +10001818 /* No such thing as a zero-level tree. */
1819 if (cur->bc_nlevels == 0)
1820 return -EFSCORRUPTED;
1821
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001822 block = NULL;
1823 keyno = 0;
1824
1825 /* initialise start pointer from cursor */
1826 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1827 pp = &ptr;
1828
1829 /*
1830 * Iterate over each level in the btree, starting at the root.
1831 * For each level above the leaves, find the key we need, based
1832 * on the lookup record, then follow the corresponding block
1833 * pointer down to the next level.
1834 */
1835 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1836 /* Get the block we need to do the lookup on. */
1837 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1838 if (error)
1839 goto error0;
1840
1841 if (diff == 0) {
1842 /*
1843 * If we already had a key match at a higher level, we
1844 * know we need to use the first entry in this block.
1845 */
1846 keyno = 1;
1847 } else {
1848 /* Otherwise search this block. Do a binary search. */
1849
1850 int high; /* high entry number */
1851 int low; /* low entry number */
1852
1853 /* Set low and high entry numbers, 1-based. */
1854 low = 1;
1855 high = xfs_btree_get_numrecs(block);
1856 if (!high) {
1857 /* Block is empty, must be an empty leaf. */
1858 ASSERT(level == 0 && cur->bc_nlevels == 1);
1859
1860 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1861 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1862 *stat = 0;
1863 return 0;
1864 }
1865
1866 /* Binary search the block. */
1867 while (low <= high) {
1868 union xfs_btree_key key;
1869 union xfs_btree_key *kp;
1870
1871 XFS_BTREE_STATS_INC(cur, compare);
1872
1873 /* keyno is average of low and high. */
1874 keyno = (low + high) >> 1;
1875
1876 /* Get current search key */
1877 kp = xfs_lookup_get_search_key(cur, level,
1878 keyno, block, &key);
1879
1880 /*
1881 * Compute difference to get next direction:
1882 * - less than, move right
1883 * - greater than, move left
1884 * - equal, we're done
1885 */
1886 diff = cur->bc_ops->key_diff(cur, kp);
1887 if (diff < 0)
1888 low = keyno + 1;
1889 else if (diff > 0)
1890 high = keyno - 1;
1891 else
1892 break;
1893 }
1894 }
1895
1896 /*
1897 * If there are more levels, set up for the next level
1898 * by getting the block number and filling in the cursor.
1899 */
1900 if (level > 0) {
1901 /*
1902 * If we moved left, need the previous key number,
1903 * unless there isn't one.
1904 */
1905 if (diff > 0 && --keyno < 1)
1906 keyno = 1;
1907 pp = xfs_btree_ptr_addr(cur, keyno, block);
1908
1909#ifdef DEBUG
1910 error = xfs_btree_check_ptr(cur, pp, 0, level);
1911 if (error)
1912 goto error0;
1913#endif
1914 cur->bc_ptrs[level] = keyno;
1915 }
1916 }
1917
1918 /* Done with the search. See if we need to adjust the results. */
1919 if (dir != XFS_LOOKUP_LE && diff < 0) {
1920 keyno++;
1921 /*
1922 * If ge search and we went off the end of the block, but it's
1923 * not the last block, we're in the wrong block.
1924 */
1925 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1926 if (dir == XFS_LOOKUP_GE &&
1927 keyno > xfs_btree_get_numrecs(block) &&
1928 !xfs_btree_ptr_is_null(cur, &ptr)) {
1929 int i;
1930
1931 cur->bc_ptrs[0] = keyno;
1932 error = xfs_btree_increment(cur, 0, &i);
1933 if (error)
1934 goto error0;
Eric Sandeen5fb5aee2015-02-23 22:39:13 +11001935 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001936 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1937 *stat = 1;
1938 return 0;
1939 }
1940 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1941 keyno--;
1942 cur->bc_ptrs[0] = keyno;
1943
1944 /* Return if we succeeded or not. */
1945 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1946 *stat = 0;
1947 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1948 *stat = 1;
1949 else
1950 *stat = 0;
1951 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1952 return 0;
1953
1954error0:
1955 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1956 return error;
1957}
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001958
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001959/* Find the high key storage area from a regular key. */
1960STATIC union xfs_btree_key *
1961xfs_btree_high_key_from_key(
1962 struct xfs_btree_cur *cur,
1963 union xfs_btree_key *key)
1964{
1965 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1966 return (union xfs_btree_key *)((char *)key +
1967 (cur->bc_ops->key_len / 2));
1968}
1969
Darrick J. Wong973b8312016-08-03 12:22:12 +10001970/* Determine the low (and high if overlapped) keys of a leaf block */
1971STATIC void
1972xfs_btree_get_leaf_keys(
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001973 struct xfs_btree_cur *cur,
1974 struct xfs_btree_block *block,
1975 union xfs_btree_key *key)
1976{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001977 union xfs_btree_key max_hkey;
1978 union xfs_btree_key hkey;
Darrick J. Wong973b8312016-08-03 12:22:12 +10001979 union xfs_btree_rec *rec;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001980 union xfs_btree_key *high;
Darrick J. Wong973b8312016-08-03 12:22:12 +10001981 int n;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001982
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001983 rec = xfs_btree_rec_addr(cur, 1, block);
1984 cur->bc_ops->init_key_from_rec(key, rec);
1985
Darrick J. Wong973b8312016-08-03 12:22:12 +10001986 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001987
Darrick J. Wong973b8312016-08-03 12:22:12 +10001988 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
1989 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
1990 rec = xfs_btree_rec_addr(cur, n, block);
1991 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
1992 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
1993 > 0)
1994 max_hkey = hkey;
1995 }
1996
1997 high = xfs_btree_high_key_from_key(cur, key);
1998 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
1999 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002000}
2001
Darrick J. Wong973b8312016-08-03 12:22:12 +10002002/* Determine the low (and high if overlapped) keys of a node block */
2003STATIC void
2004xfs_btree_get_node_keys(
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002005 struct xfs_btree_cur *cur,
2006 struct xfs_btree_block *block,
2007 union xfs_btree_key *key)
2008{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002009 union xfs_btree_key *hkey;
2010 union xfs_btree_key *max_hkey;
2011 union xfs_btree_key *high;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002012 int n;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002013
Darrick J. Wong973b8312016-08-03 12:22:12 +10002014 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2015 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2016 cur->bc_ops->key_len / 2);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002017
Darrick J. Wong973b8312016-08-03 12:22:12 +10002018 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2019 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2020 hkey = xfs_btree_high_key_addr(cur, n, block);
2021 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2022 max_hkey = hkey;
2023 }
2024
2025 high = xfs_btree_high_key_from_key(cur, key);
2026 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2027 } else {
2028 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2029 cur->bc_ops->key_len);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002030 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002031}
2032
Darrick J. Wong70b22652016-08-03 11:03:38 +10002033/* Derive the keys for any btree block. */
2034STATIC void
2035xfs_btree_get_keys(
2036 struct xfs_btree_cur *cur,
2037 struct xfs_btree_block *block,
2038 union xfs_btree_key *key)
2039{
2040 if (be16_to_cpu(block->bb_level) == 0)
Darrick J. Wong973b8312016-08-03 12:22:12 +10002041 xfs_btree_get_leaf_keys(cur, block, key);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002042 else
Darrick J. Wong973b8312016-08-03 12:22:12 +10002043 xfs_btree_get_node_keys(cur, block, key);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002044}
2045
2046/*
2047 * Decide if we need to update the parent keys of a btree block. For
2048 * a standard btree this is only necessary if we're updating the first
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002049 * record/key. For an overlapping btree, we must always update the
2050 * keys because the highest key can be in any of the records or keys
2051 * in the block.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002052 */
2053static inline bool
2054xfs_btree_needs_key_update(
2055 struct xfs_btree_cur *cur,
2056 int ptr)
2057{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002058 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2059}
2060
2061/*
2062 * Update the low and high parent keys of the given level, progressing
2063 * towards the root. If force_all is false, stop if the keys for a given
2064 * level do not need updating.
2065 */
2066STATIC int
2067__xfs_btree_updkeys(
2068 struct xfs_btree_cur *cur,
2069 int level,
2070 struct xfs_btree_block *block,
2071 struct xfs_buf *bp0,
2072 bool force_all)
2073{
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10002074 union xfs_btree_key key; /* keys from current level */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002075 union xfs_btree_key *lkey; /* keys from the next level up */
2076 union xfs_btree_key *hkey;
2077 union xfs_btree_key *nlkey; /* keys from the next level up */
2078 union xfs_btree_key *nhkey;
2079 struct xfs_buf *bp;
2080 int ptr;
2081
2082 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2083
2084 /* Exit if there aren't any parent levels to update. */
2085 if (level + 1 >= cur->bc_nlevels)
2086 return 0;
2087
2088 trace_xfs_btree_updkeys(cur, level, bp0);
2089
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10002090 lkey = &key;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002091 hkey = xfs_btree_high_key_from_key(cur, lkey);
2092 xfs_btree_get_keys(cur, block, lkey);
2093 for (level++; level < cur->bc_nlevels; level++) {
2094#ifdef DEBUG
2095 int error;
2096#endif
2097 block = xfs_btree_get_block(cur, level, &bp);
2098 trace_xfs_btree_updkeys(cur, level, bp);
2099#ifdef DEBUG
2100 error = xfs_btree_check_block(cur, block, level, bp);
2101 if (error) {
2102 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2103 return error;
2104 }
2105#endif
2106 ptr = cur->bc_ptrs[level];
2107 nlkey = xfs_btree_key_addr(cur, ptr, block);
2108 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2109 if (!force_all &&
2110 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2111 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2112 break;
2113 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2114 xfs_btree_log_keys(cur, bp, ptr, ptr);
2115 if (level + 1 >= cur->bc_nlevels)
2116 break;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002117 xfs_btree_get_node_keys(cur, block, lkey);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002118 }
2119
2120 return 0;
2121}
2122
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002123/* Update all the keys from some level in cursor back to the root. */
2124STATIC int
2125xfs_btree_updkeys_force(
2126 struct xfs_btree_cur *cur,
2127 int level)
2128{
2129 struct xfs_buf *bp;
2130 struct xfs_btree_block *block;
2131
2132 block = xfs_btree_get_block(cur, level, &bp);
2133 return __xfs_btree_updkeys(cur, level, block, bp, true);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002134}
2135
2136/*
2137 * Update the parent keys of the given level, progressing towards the root.
2138 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002139STATIC int
Darrick J. Wong70b22652016-08-03 11:03:38 +10002140xfs_btree_update_keys(
2141 struct xfs_btree_cur *cur,
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002142 int level)
2143{
2144 struct xfs_btree_block *block;
2145 struct xfs_buf *bp;
2146 union xfs_btree_key *kp;
Darrick J. Wong70b22652016-08-03 11:03:38 +10002147 union xfs_btree_key key;
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002148 int ptr;
2149
Darrick J. Wong973b8312016-08-03 12:22:12 +10002150 ASSERT(level >= 0);
2151
2152 block = xfs_btree_get_block(cur, level, &bp);
2153 if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2154 return __xfs_btree_updkeys(cur, level, block, bp, false);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002155
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002156 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2157 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
2158
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002159 /*
2160 * Go up the tree from this level toward the root.
2161 * At each level, update the key value to the value input.
2162 * Stop when we reach a level where the cursor isn't pointing
2163 * at the first entry in the block.
2164 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002165 xfs_btree_get_keys(cur, block, &key);
2166 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002167#ifdef DEBUG
2168 int error;
2169#endif
2170 block = xfs_btree_get_block(cur, level, &bp);
2171#ifdef DEBUG
2172 error = xfs_btree_check_block(cur, block, level, bp);
2173 if (error) {
2174 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2175 return error;
2176 }
2177#endif
2178 ptr = cur->bc_ptrs[level];
2179 kp = xfs_btree_key_addr(cur, ptr, block);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002180 xfs_btree_copy_keys(cur, kp, &key, 1);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002181 xfs_btree_log_keys(cur, bp, ptr, ptr);
2182 }
2183
2184 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2185 return 0;
2186}
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002187
2188/*
2189 * Update the record referred to by cur to the value in the
2190 * given record. This either works (return 0) or gets an
2191 * EFSCORRUPTED error.
2192 */
2193int
2194xfs_btree_update(
2195 struct xfs_btree_cur *cur,
2196 union xfs_btree_rec *rec)
2197{
2198 struct xfs_btree_block *block;
2199 struct xfs_buf *bp;
2200 int error;
2201 int ptr;
2202 union xfs_btree_rec *rp;
2203
2204 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2205 XFS_BTREE_TRACE_ARGR(cur, rec);
2206
2207 /* Pick up the current block. */
2208 block = xfs_btree_get_block(cur, 0, &bp);
2209
2210#ifdef DEBUG
2211 error = xfs_btree_check_block(cur, block, 0, bp);
2212 if (error)
2213 goto error0;
2214#endif
2215 /* Get the address of the rec to be updated. */
2216 ptr = cur->bc_ptrs[0];
2217 rp = xfs_btree_rec_addr(cur, ptr, block);
2218
2219 /* Fill in the new contents and log them. */
2220 xfs_btree_copy_recs(cur, rp, rec, 1);
2221 xfs_btree_log_recs(cur, bp, ptr, ptr);
2222
2223 /*
2224 * If we are tracking the last record in the tree and
2225 * we are at the far right edge of the tree, update it.
2226 */
2227 if (xfs_btree_is_lastrec(cur, block, 0)) {
2228 cur->bc_ops->update_lastrec(cur, block, rec,
2229 ptr, LASTREC_UPDATE);
2230 }
2231
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002232 /* Pass new key value up to our parent. */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002233 if (xfs_btree_needs_key_update(cur, ptr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002234 error = xfs_btree_update_keys(cur, 0);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002235 if (error)
2236 goto error0;
2237 }
2238
2239 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2240 return 0;
2241
2242error0:
2243 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2244 return error;
2245}
2246
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002247/*
Christoph Hellwig687b8902008-10-30 16:56:53 +11002248 * Move 1 record left from cur/level if possible.
2249 * Update cur to reflect the new path.
2250 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002251STATIC int /* error */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002252xfs_btree_lshift(
2253 struct xfs_btree_cur *cur,
2254 int level,
2255 int *stat) /* success/failure */
2256{
Christoph Hellwig687b8902008-10-30 16:56:53 +11002257 struct xfs_buf *lbp; /* left buffer pointer */
2258 struct xfs_btree_block *left; /* left btree block */
2259 int lrecs; /* left record count */
2260 struct xfs_buf *rbp; /* right buffer pointer */
2261 struct xfs_btree_block *right; /* right btree block */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002262 struct xfs_btree_cur *tcur; /* temporary btree cursor */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002263 int rrecs; /* right record count */
2264 union xfs_btree_ptr lptr; /* left btree pointer */
2265 union xfs_btree_key *rkp = NULL; /* right btree key */
2266 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2267 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2268 int error; /* error return value */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002269 int i;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002270
2271 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2272 XFS_BTREE_TRACE_ARGI(cur, level);
2273
2274 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2275 level == cur->bc_nlevels - 1)
2276 goto out0;
2277
2278 /* Set up variables for this block as "right". */
2279 right = xfs_btree_get_block(cur, level, &rbp);
2280
2281#ifdef DEBUG
2282 error = xfs_btree_check_block(cur, right, level, rbp);
2283 if (error)
2284 goto error0;
2285#endif
2286
2287 /* If we've got no left sibling then we can't shift an entry left. */
2288 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2289 if (xfs_btree_ptr_is_null(cur, &lptr))
2290 goto out0;
2291
2292 /*
2293 * If the cursor entry is the one that would be moved, don't
2294 * do it... it's too complicated.
2295 */
2296 if (cur->bc_ptrs[level] <= 1)
2297 goto out0;
2298
2299 /* Set up the left neighbor as "left". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002300 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002301 if (error)
2302 goto error0;
2303
2304 /* If it's full, it can't take another entry. */
2305 lrecs = xfs_btree_get_numrecs(left);
2306 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2307 goto out0;
2308
2309 rrecs = xfs_btree_get_numrecs(right);
2310
2311 /*
2312 * We add one entry to the left side and remove one for the right side.
Malcolm Parsons9da096f2009-03-29 09:55:42 +02002313 * Account for it here, the changes will be updated on disk and logged
Christoph Hellwig687b8902008-10-30 16:56:53 +11002314 * later.
2315 */
2316 lrecs++;
2317 rrecs--;
2318
2319 XFS_BTREE_STATS_INC(cur, lshift);
2320 XFS_BTREE_STATS_ADD(cur, moves, 1);
2321
2322 /*
2323 * If non-leaf, copy a key and a ptr to the left block.
2324 * Log the changes to the left block.
2325 */
2326 if (level > 0) {
2327 /* It's a non-leaf. Move keys and pointers. */
2328 union xfs_btree_key *lkp; /* left btree key */
2329 union xfs_btree_ptr *lpp; /* left address pointer */
2330
2331 lkp = xfs_btree_key_addr(cur, lrecs, left);
2332 rkp = xfs_btree_key_addr(cur, 1, right);
2333
2334 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2335 rpp = xfs_btree_ptr_addr(cur, 1, right);
2336#ifdef DEBUG
2337 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2338 if (error)
2339 goto error0;
2340#endif
2341 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2342 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2343
2344 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2345 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2346
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002347 ASSERT(cur->bc_ops->keys_inorder(cur,
2348 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002349 } else {
2350 /* It's a leaf. Move records. */
2351 union xfs_btree_rec *lrp; /* left record pointer */
2352
2353 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2354 rrp = xfs_btree_rec_addr(cur, 1, right);
2355
2356 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2357 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2358
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002359 ASSERT(cur->bc_ops->recs_inorder(cur,
2360 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002361 }
2362
2363 xfs_btree_set_numrecs(left, lrecs);
2364 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2365
2366 xfs_btree_set_numrecs(right, rrecs);
2367 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2368
2369 /*
2370 * Slide the contents of right down one entry.
2371 */
2372 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2373 if (level > 0) {
2374 /* It's a nonleaf. operate on keys and ptrs */
2375#ifdef DEBUG
2376 int i; /* loop index */
2377
2378 for (i = 0; i < rrecs; i++) {
2379 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2380 if (error)
2381 goto error0;
2382 }
2383#endif
2384 xfs_btree_shift_keys(cur,
2385 xfs_btree_key_addr(cur, 2, right),
2386 -1, rrecs);
2387 xfs_btree_shift_ptrs(cur,
2388 xfs_btree_ptr_addr(cur, 2, right),
2389 -1, rrecs);
2390
2391 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2392 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2393 } else {
2394 /* It's a leaf. operate on records */
2395 xfs_btree_shift_recs(cur,
2396 xfs_btree_rec_addr(cur, 2, right),
2397 -1, rrecs);
2398 xfs_btree_log_recs(cur, rbp, 1, rrecs);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002399 }
2400
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002401 /*
2402 * Using a temporary cursor, update the parent key values of the
2403 * block on the left.
2404 */
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002405 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2406 error = xfs_btree_dup_cursor(cur, &tcur);
2407 if (error)
2408 goto error0;
2409 i = xfs_btree_firstrec(tcur, level);
2410 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002411
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002412 error = xfs_btree_decrement(tcur, level, &i);
2413 if (error)
2414 goto error1;
2415
2416 /* Update the parent high keys of the left block, if needed. */
2417 error = xfs_btree_update_keys(tcur, level);
2418 if (error)
2419 goto error1;
2420
2421 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2422 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002423
Darrick J. Wong70b22652016-08-03 11:03:38 +10002424 /* Update the parent keys of the right block. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002425 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002426 if (error)
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002427 goto error0;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002428
2429 /* Slide the cursor value left one. */
2430 cur->bc_ptrs[level]--;
2431
2432 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2433 *stat = 1;
2434 return 0;
2435
2436out0:
2437 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2438 *stat = 0;
2439 return 0;
2440
2441error0:
2442 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2443 return error;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002444
2445error1:
2446 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2447 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2448 return error;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002449}
2450
2451/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002452 * Move 1 record right from cur/level if possible.
2453 * Update cur to reflect the new path.
2454 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002455STATIC int /* error */
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002456xfs_btree_rshift(
2457 struct xfs_btree_cur *cur,
2458 int level,
2459 int *stat) /* success/failure */
2460{
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002461 struct xfs_buf *lbp; /* left buffer pointer */
2462 struct xfs_btree_block *left; /* left btree block */
2463 struct xfs_buf *rbp; /* right buffer pointer */
2464 struct xfs_btree_block *right; /* right btree block */
2465 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2466 union xfs_btree_ptr rptr; /* right block pointer */
2467 union xfs_btree_key *rkp; /* right btree key */
2468 int rrecs; /* right record count */
2469 int lrecs; /* left record count */
2470 int error; /* error return value */
2471 int i; /* loop counter */
2472
2473 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2474 XFS_BTREE_TRACE_ARGI(cur, level);
2475
2476 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2477 (level == cur->bc_nlevels - 1))
2478 goto out0;
2479
2480 /* Set up variables for this block as "left". */
2481 left = xfs_btree_get_block(cur, level, &lbp);
2482
2483#ifdef DEBUG
2484 error = xfs_btree_check_block(cur, left, level, lbp);
2485 if (error)
2486 goto error0;
2487#endif
2488
2489 /* If we've got no right sibling then we can't shift an entry right. */
2490 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2491 if (xfs_btree_ptr_is_null(cur, &rptr))
2492 goto out0;
2493
2494 /*
2495 * If the cursor entry is the one that would be moved, don't
2496 * do it... it's too complicated.
2497 */
2498 lrecs = xfs_btree_get_numrecs(left);
2499 if (cur->bc_ptrs[level] >= lrecs)
2500 goto out0;
2501
2502 /* Set up the right neighbor as "right". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002503 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002504 if (error)
2505 goto error0;
2506
2507 /* If it's full, it can't take another entry. */
2508 rrecs = xfs_btree_get_numrecs(right);
2509 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2510 goto out0;
2511
2512 XFS_BTREE_STATS_INC(cur, rshift);
2513 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2514
2515 /*
2516 * Make a hole at the start of the right neighbor block, then
2517 * copy the last left block entry to the hole.
2518 */
2519 if (level > 0) {
2520 /* It's a nonleaf. make a hole in the keys and ptrs */
2521 union xfs_btree_key *lkp;
2522 union xfs_btree_ptr *lpp;
2523 union xfs_btree_ptr *rpp;
2524
2525 lkp = xfs_btree_key_addr(cur, lrecs, left);
2526 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2527 rkp = xfs_btree_key_addr(cur, 1, right);
2528 rpp = xfs_btree_ptr_addr(cur, 1, right);
2529
2530#ifdef DEBUG
2531 for (i = rrecs - 1; i >= 0; i--) {
2532 error = xfs_btree_check_ptr(cur, rpp, i, level);
2533 if (error)
2534 goto error0;
2535 }
2536#endif
2537
2538 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2539 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2540
2541#ifdef DEBUG
2542 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2543 if (error)
2544 goto error0;
2545#endif
2546
2547 /* Now put the new data in, and log it. */
2548 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2549 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2550
2551 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2552 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2553
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002554 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2555 xfs_btree_key_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002556 } else {
2557 /* It's a leaf. make a hole in the records */
2558 union xfs_btree_rec *lrp;
2559 union xfs_btree_rec *rrp;
2560
2561 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2562 rrp = xfs_btree_rec_addr(cur, 1, right);
2563
2564 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2565
2566 /* Now put the new data in, and log it. */
2567 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2568 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002569 }
2570
2571 /*
2572 * Decrement and log left's numrecs, bump and log right's numrecs.
2573 */
2574 xfs_btree_set_numrecs(left, --lrecs);
2575 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2576
2577 xfs_btree_set_numrecs(right, ++rrecs);
2578 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2579
2580 /*
2581 * Using a temporary cursor, update the parent key values of the
2582 * block on the right.
2583 */
2584 error = xfs_btree_dup_cursor(cur, &tcur);
2585 if (error)
2586 goto error0;
2587 i = xfs_btree_lastrec(tcur, level);
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002588 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002589
2590 error = xfs_btree_increment(tcur, level, &i);
2591 if (error)
2592 goto error1;
2593
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002594 /* Update the parent high keys of the left block, if needed. */
2595 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002596 error = xfs_btree_update_keys(cur, level);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002597 if (error)
2598 goto error1;
2599 }
2600
Darrick J. Wong70b22652016-08-03 11:03:38 +10002601 /* Update the parent keys of the right block. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002602 error = xfs_btree_update_keys(tcur, level);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002603 if (error)
2604 goto error1;
2605
2606 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2607
2608 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2609 *stat = 1;
2610 return 0;
2611
2612out0:
2613 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2614 *stat = 0;
2615 return 0;
2616
2617error0:
2618 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2619 return error;
2620
2621error1:
2622 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2623 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2624 return error;
2625}
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002626
2627/*
2628 * Split cur/level block in half.
2629 * Return new block number and the key to its first
2630 * record (to be inserted into parent).
2631 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002632STATIC int /* error */
Dave Chinnercf11da92014-07-15 07:08:24 +10002633__xfs_btree_split(
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002634 struct xfs_btree_cur *cur,
2635 int level,
2636 union xfs_btree_ptr *ptrp,
2637 union xfs_btree_key *key,
2638 struct xfs_btree_cur **curp,
2639 int *stat) /* success/failure */
2640{
2641 union xfs_btree_ptr lptr; /* left sibling block ptr */
2642 struct xfs_buf *lbp; /* left buffer pointer */
2643 struct xfs_btree_block *left; /* left btree block */
2644 union xfs_btree_ptr rptr; /* right sibling block ptr */
2645 struct xfs_buf *rbp; /* right buffer pointer */
2646 struct xfs_btree_block *right; /* right btree block */
2647 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2648 struct xfs_buf *rrbp; /* right-right buffer pointer */
2649 struct xfs_btree_block *rrblock; /* right-right btree block */
2650 int lrecs;
2651 int rrecs;
2652 int src_index;
2653 int error; /* error return value */
2654#ifdef DEBUG
2655 int i;
2656#endif
2657
2658 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2659 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2660
2661 XFS_BTREE_STATS_INC(cur, split);
2662
2663 /* Set up left block (current one). */
2664 left = xfs_btree_get_block(cur, level, &lbp);
2665
2666#ifdef DEBUG
2667 error = xfs_btree_check_block(cur, left, level, lbp);
2668 if (error)
2669 goto error0;
2670#endif
2671
2672 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2673
2674 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002675 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002676 if (error)
2677 goto error0;
2678 if (*stat == 0)
2679 goto out0;
2680 XFS_BTREE_STATS_INC(cur, alloc);
2681
2682 /* Set up the new block as "right". */
2683 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2684 if (error)
2685 goto error0;
2686
2687 /* Fill in the btree header for the new right block. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002688 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002689
2690 /*
2691 * Split the entries between the old and the new block evenly.
2692 * Make sure that if there's an odd number of entries now, that
2693 * each new block will have the same number of entries.
2694 */
2695 lrecs = xfs_btree_get_numrecs(left);
2696 rrecs = lrecs / 2;
2697 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2698 rrecs++;
2699 src_index = (lrecs - rrecs + 1);
2700
2701 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2702
Darrick J. Wong70b22652016-08-03 11:03:38 +10002703 /* Adjust numrecs for the later get_*_keys() calls. */
2704 lrecs -= rrecs;
2705 xfs_btree_set_numrecs(left, lrecs);
2706 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2707
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002708 /*
2709 * Copy btree block entries from the left block over to the
2710 * new block, the right. Update the right block and log the
2711 * changes.
2712 */
2713 if (level > 0) {
2714 /* It's a non-leaf. Move keys and pointers. */
2715 union xfs_btree_key *lkp; /* left btree key */
2716 union xfs_btree_ptr *lpp; /* left address pointer */
2717 union xfs_btree_key *rkp; /* right btree key */
2718 union xfs_btree_ptr *rpp; /* right address pointer */
2719
2720 lkp = xfs_btree_key_addr(cur, src_index, left);
2721 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2722 rkp = xfs_btree_key_addr(cur, 1, right);
2723 rpp = xfs_btree_ptr_addr(cur, 1, right);
2724
2725#ifdef DEBUG
2726 for (i = src_index; i < rrecs; i++) {
2727 error = xfs_btree_check_ptr(cur, lpp, i, level);
2728 if (error)
2729 goto error0;
2730 }
2731#endif
2732
Darrick J. Wong70b22652016-08-03 11:03:38 +10002733 /* Copy the keys & pointers to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002734 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2735 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2736
2737 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2738 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2739
Darrick J. Wong70b22652016-08-03 11:03:38 +10002740 /* Stash the keys of the new block for later insertion. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002741 xfs_btree_get_node_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002742 } else {
2743 /* It's a leaf. Move records. */
2744 union xfs_btree_rec *lrp; /* left record pointer */
2745 union xfs_btree_rec *rrp; /* right record pointer */
2746
2747 lrp = xfs_btree_rec_addr(cur, src_index, left);
2748 rrp = xfs_btree_rec_addr(cur, 1, right);
2749
Darrick J. Wong70b22652016-08-03 11:03:38 +10002750 /* Copy records to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002751 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2752 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2753
Darrick J. Wong70b22652016-08-03 11:03:38 +10002754 /* Stash the keys of the new block for later insertion. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002755 xfs_btree_get_leaf_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002756 }
2757
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002758 /*
2759 * Find the left block number by looking in the buffer.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002760 * Adjust sibling pointers.
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002761 */
2762 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2763 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2764 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2765 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2766
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002767 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2768 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2769
2770 /*
2771 * If there's a block to the new block's right, make that block
2772 * point back to right instead of to left.
2773 */
2774 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002775 error = xfs_btree_read_buf_block(cur, &rrptr,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002776 0, &rrblock, &rrbp);
2777 if (error)
2778 goto error0;
2779 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2780 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2781 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002782
2783 /* Update the parent high keys of the left block, if needed. */
2784 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002785 error = xfs_btree_update_keys(cur, level);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002786 if (error)
2787 goto error0;
2788 }
2789
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002790 /*
2791 * If the cursor is really in the right block, move it there.
2792 * If it's just pointing past the last entry in left, then we'll
2793 * insert there, so don't change anything in that case.
2794 */
2795 if (cur->bc_ptrs[level] > lrecs + 1) {
2796 xfs_btree_setbuf(cur, level, rbp);
2797 cur->bc_ptrs[level] -= lrecs;
2798 }
2799 /*
2800 * If there are more levels, we'll need another cursor which refers
2801 * the right block, no matter where this cursor was.
2802 */
2803 if (level + 1 < cur->bc_nlevels) {
2804 error = xfs_btree_dup_cursor(cur, curp);
2805 if (error)
2806 goto error0;
2807 (*curp)->bc_ptrs[level + 1]++;
2808 }
2809 *ptrp = rptr;
2810 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2811 *stat = 1;
2812 return 0;
2813out0:
2814 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2815 *stat = 0;
2816 return 0;
2817
2818error0:
2819 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2820 return error;
2821}
Christoph Hellwig344207c2008-10-30 16:57:16 +11002822
Dave Chinnercf11da92014-07-15 07:08:24 +10002823struct xfs_btree_split_args {
2824 struct xfs_btree_cur *cur;
2825 int level;
2826 union xfs_btree_ptr *ptrp;
2827 union xfs_btree_key *key;
2828 struct xfs_btree_cur **curp;
2829 int *stat; /* success/failure */
2830 int result;
2831 bool kswapd; /* allocation in kswapd context */
2832 struct completion *done;
2833 struct work_struct work;
2834};
2835
2836/*
2837 * Stack switching interfaces for allocation
2838 */
2839static void
2840xfs_btree_split_worker(
2841 struct work_struct *work)
2842{
2843 struct xfs_btree_split_args *args = container_of(work,
2844 struct xfs_btree_split_args, work);
2845 unsigned long pflags;
2846 unsigned long new_pflags = PF_FSTRANS;
2847
2848 /*
2849 * we are in a transaction context here, but may also be doing work
2850 * in kswapd context, and hence we may need to inherit that state
2851 * temporarily to ensure that we don't block waiting for memory reclaim
2852 * in any way.
2853 */
2854 if (args->kswapd)
2855 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2856
2857 current_set_flags_nested(&pflags, new_pflags);
2858
2859 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2860 args->key, args->curp, args->stat);
2861 complete(args->done);
2862
2863 current_restore_flags_nested(&pflags, new_pflags);
2864}
2865
2866/*
2867 * BMBT split requests often come in with little stack to work on. Push
2868 * them off to a worker thread so there is lots of stack to use. For the other
2869 * btree types, just call directly to avoid the context switch overhead here.
2870 */
2871STATIC int /* error */
2872xfs_btree_split(
2873 struct xfs_btree_cur *cur,
2874 int level,
2875 union xfs_btree_ptr *ptrp,
2876 union xfs_btree_key *key,
2877 struct xfs_btree_cur **curp,
2878 int *stat) /* success/failure */
2879{
2880 struct xfs_btree_split_args args;
2881 DECLARE_COMPLETION_ONSTACK(done);
2882
2883 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2884 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2885
2886 args.cur = cur;
2887 args.level = level;
2888 args.ptrp = ptrp;
2889 args.key = key;
2890 args.curp = curp;
2891 args.stat = stat;
2892 args.done = &done;
2893 args.kswapd = current_is_kswapd();
2894 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2895 queue_work(xfs_alloc_wq, &args.work);
2896 wait_for_completion(&done);
2897 destroy_work_on_stack(&args.work);
2898 return args.result;
2899}
2900
2901
Christoph Hellwig344207c2008-10-30 16:57:16 +11002902/*
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002903 * Copy the old inode root contents into a real block and make the
2904 * broot point to it.
2905 */
2906int /* error */
2907xfs_btree_new_iroot(
2908 struct xfs_btree_cur *cur, /* btree cursor */
2909 int *logflags, /* logging flags for inode */
2910 int *stat) /* return status - 0 fail */
2911{
2912 struct xfs_buf *cbp; /* buffer for cblock */
2913 struct xfs_btree_block *block; /* btree block */
2914 struct xfs_btree_block *cblock; /* child btree block */
2915 union xfs_btree_key *ckp; /* child key pointer */
2916 union xfs_btree_ptr *cpp; /* child ptr pointer */
2917 union xfs_btree_key *kp; /* pointer to btree key */
2918 union xfs_btree_ptr *pp; /* pointer to block addr */
2919 union xfs_btree_ptr nptr; /* new block addr */
2920 int level; /* btree level */
2921 int error; /* error return code */
2922#ifdef DEBUG
2923 int i; /* loop counter */
2924#endif
2925
2926 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2927 XFS_BTREE_STATS_INC(cur, newroot);
2928
2929 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2930
2931 level = cur->bc_nlevels - 1;
2932
2933 block = xfs_btree_get_iroot(cur);
2934 pp = xfs_btree_ptr_addr(cur, 1, block);
2935
2936 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002937 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002938 if (error)
2939 goto error0;
2940 if (*stat == 0) {
2941 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2942 return 0;
2943 }
2944 XFS_BTREE_STATS_INC(cur, alloc);
2945
2946 /* Copy the root into a real block. */
2947 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2948 if (error)
2949 goto error0;
2950
Dave Chinner088c9f62013-06-12 12:19:08 +10002951 /*
2952 * we can't just memcpy() the root in for CRC enabled btree blocks.
2953 * In that case have to also ensure the blkno remains correct
2954 */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002955 memcpy(cblock, block, xfs_btree_block_len(cur));
Dave Chinner088c9f62013-06-12 12:19:08 +10002956 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2957 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2958 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2959 else
2960 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2961 }
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002962
2963 be16_add_cpu(&block->bb_level, 1);
2964 xfs_btree_set_numrecs(block, 1);
2965 cur->bc_nlevels++;
2966 cur->bc_ptrs[level + 1] = 1;
2967
2968 kp = xfs_btree_key_addr(cur, 1, block);
2969 ckp = xfs_btree_key_addr(cur, 1, cblock);
2970 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2971
2972 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2973#ifdef DEBUG
2974 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2975 error = xfs_btree_check_ptr(cur, pp, i, level);
2976 if (error)
2977 goto error0;
2978 }
2979#endif
2980 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2981
2982#ifdef DEBUG
2983 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2984 if (error)
2985 goto error0;
2986#endif
2987 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2988
2989 xfs_iroot_realloc(cur->bc_private.b.ip,
2990 1 - xfs_btree_get_numrecs(cblock),
2991 cur->bc_private.b.whichfork);
2992
2993 xfs_btree_setbuf(cur, level, cbp);
2994
2995 /*
2996 * Do all this logging at the end so that
2997 * the root is at the right level.
2998 */
2999 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3000 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3001 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3002
3003 *logflags |=
Eric Sandeen9d87c312009-01-14 23:22:07 -06003004 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11003005 *stat = 1;
3006 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3007 return 0;
3008error0:
3009 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3010 return error;
3011}
3012
3013/*
Christoph Hellwig344207c2008-10-30 16:57:16 +11003014 * Allocate a new root block, fill it in.
3015 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11003016STATIC int /* error */
Christoph Hellwig344207c2008-10-30 16:57:16 +11003017xfs_btree_new_root(
3018 struct xfs_btree_cur *cur, /* btree cursor */
3019 int *stat) /* success/failure */
3020{
3021 struct xfs_btree_block *block; /* one half of the old root block */
3022 struct xfs_buf *bp; /* buffer containing block */
3023 int error; /* error return value */
3024 struct xfs_buf *lbp; /* left buffer pointer */
3025 struct xfs_btree_block *left; /* left btree block */
3026 struct xfs_buf *nbp; /* new (root) buffer */
3027 struct xfs_btree_block *new; /* new (root) btree block */
3028 int nptr; /* new value for key index, 1 or 2 */
3029 struct xfs_buf *rbp; /* right buffer pointer */
3030 struct xfs_btree_block *right; /* right btree block */
3031 union xfs_btree_ptr rptr;
3032 union xfs_btree_ptr lptr;
3033
3034 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3035 XFS_BTREE_STATS_INC(cur, newroot);
3036
3037 /* initialise our start point from the cursor */
3038 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3039
3040 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10003041 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003042 if (error)
3043 goto error0;
3044 if (*stat == 0)
3045 goto out0;
3046 XFS_BTREE_STATS_INC(cur, alloc);
3047
3048 /* Set up the new block. */
3049 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3050 if (error)
3051 goto error0;
3052
3053 /* Set the root in the holding structure increasing the level by 1. */
3054 cur->bc_ops->set_root(cur, &lptr, 1);
3055
3056 /*
3057 * At the previous root level there are now two blocks: the old root,
3058 * and the new block generated when it was split. We don't know which
3059 * one the cursor is pointing at, so we set up variables "left" and
3060 * "right" for each case.
3061 */
3062 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3063
3064#ifdef DEBUG
3065 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3066 if (error)
3067 goto error0;
3068#endif
3069
3070 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3071 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3072 /* Our block is left, pick up the right block. */
3073 lbp = bp;
3074 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3075 left = block;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003076 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003077 if (error)
3078 goto error0;
3079 bp = rbp;
3080 nptr = 1;
3081 } else {
3082 /* Our block is right, pick up the left block. */
3083 rbp = bp;
3084 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3085 right = block;
3086 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003087 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003088 if (error)
3089 goto error0;
3090 bp = lbp;
3091 nptr = 2;
3092 }
Darrick J. Wong70b22652016-08-03 11:03:38 +10003093
Christoph Hellwig344207c2008-10-30 16:57:16 +11003094 /* Fill in the new block's btree header and log it. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05003095 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003096 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3097 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3098 !xfs_btree_ptr_is_null(cur, &rptr));
3099
3100 /* Fill in the key data in the new root. */
3101 if (xfs_btree_get_level(left) > 0) {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003102 /*
3103 * Get the keys for the left block's keys and put them directly
3104 * in the parent block. Do the same for the right block.
3105 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10003106 xfs_btree_get_node_keys(cur, left,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003107 xfs_btree_key_addr(cur, 1, new));
Darrick J. Wong973b8312016-08-03 12:22:12 +10003108 xfs_btree_get_node_keys(cur, right,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003109 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003110 } else {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003111 /*
3112 * Get the keys for the left block's records and put them
3113 * directly in the parent block. Do the same for the right
3114 * block.
3115 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10003116 xfs_btree_get_leaf_keys(cur, left,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003117 xfs_btree_key_addr(cur, 1, new));
Darrick J. Wong973b8312016-08-03 12:22:12 +10003118 xfs_btree_get_leaf_keys(cur, right,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003119 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003120 }
3121 xfs_btree_log_keys(cur, nbp, 1, 2);
3122
3123 /* Fill in the pointer data in the new root. */
3124 xfs_btree_copy_ptrs(cur,
3125 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3126 xfs_btree_copy_ptrs(cur,
3127 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3128 xfs_btree_log_ptrs(cur, nbp, 1, 2);
3129
3130 /* Fix up the cursor. */
3131 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3132 cur->bc_ptrs[cur->bc_nlevels] = nptr;
3133 cur->bc_nlevels++;
3134 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3135 *stat = 1;
3136 return 0;
3137error0:
3138 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3139 return error;
3140out0:
3141 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3142 *stat = 0;
3143 return 0;
3144}
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003145
3146STATIC int
3147xfs_btree_make_block_unfull(
3148 struct xfs_btree_cur *cur, /* btree cursor */
3149 int level, /* btree level */
3150 int numrecs,/* # of recs in block */
3151 int *oindex,/* old tree index */
3152 int *index, /* new tree index */
3153 union xfs_btree_ptr *nptr, /* new btree ptr */
3154 struct xfs_btree_cur **ncur, /* new btree cursor */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003155 union xfs_btree_key *key, /* key of new block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003156 int *stat)
3157{
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003158 int error = 0;
3159
3160 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3161 level == cur->bc_nlevels - 1) {
3162 struct xfs_inode *ip = cur->bc_private.b.ip;
3163
3164 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3165 /* A root block that can be made bigger. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003166 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
Darrick J. Wong0d309792016-08-03 11:01:25 +10003167 *stat = 1;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003168 } else {
3169 /* A root block that needs replacing */
3170 int logflags = 0;
3171
3172 error = xfs_btree_new_iroot(cur, &logflags, stat);
3173 if (error || *stat == 0)
3174 return error;
3175
3176 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3177 }
3178
3179 return 0;
3180 }
3181
3182 /* First, try shifting an entry to the right neighbor. */
3183 error = xfs_btree_rshift(cur, level, stat);
3184 if (error || *stat)
3185 return error;
3186
3187 /* Next, try shifting an entry to the left neighbor. */
3188 error = xfs_btree_lshift(cur, level, stat);
3189 if (error)
3190 return error;
3191
3192 if (*stat) {
3193 *oindex = *index = cur->bc_ptrs[level];
3194 return 0;
3195 }
3196
3197 /*
3198 * Next, try splitting the current block in half.
3199 *
3200 * If this works we have to re-set our variables because we
3201 * could be in a different block now.
3202 */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003203 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003204 if (error || *stat == 0)
3205 return error;
3206
3207
3208 *index = cur->bc_ptrs[level];
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003209 return 0;
3210}
3211
3212/*
3213 * Insert one record/level. Return information to the caller
3214 * allowing the next level up to proceed if necessary.
3215 */
3216STATIC int
3217xfs_btree_insrec(
3218 struct xfs_btree_cur *cur, /* btree cursor */
3219 int level, /* level to insert record at */
3220 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003221 union xfs_btree_rec *rec, /* record to insert */
3222 union xfs_btree_key *key, /* i/o: block key for ptrp */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003223 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3224 int *stat) /* success/failure */
3225{
3226 struct xfs_btree_block *block; /* btree block */
3227 struct xfs_buf *bp; /* buffer for block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003228 union xfs_btree_ptr nptr; /* new block ptr */
3229 struct xfs_btree_cur *ncur; /* new btree cursor */
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003230 union xfs_btree_key nkey; /* new block key */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003231 union xfs_btree_key *lkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003232 int optr; /* old key/record index */
3233 int ptr; /* key/record index */
3234 int numrecs;/* number of records */
3235 int error; /* error return value */
3236#ifdef DEBUG
3237 int i;
3238#endif
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003239 xfs_daddr_t old_bn;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003240
3241 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003242 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003243
3244 ncur = NULL;
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003245 lkey = &nkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003246
3247 /*
3248 * If we have an external root pointer, and we've made it to the
3249 * root level, allocate a new root block and we're done.
3250 */
3251 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3252 (level >= cur->bc_nlevels)) {
3253 error = xfs_btree_new_root(cur, stat);
3254 xfs_btree_set_ptr_null(cur, ptrp);
3255
3256 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3257 return error;
3258 }
3259
3260 /* If we're off the left edge, return failure. */
3261 ptr = cur->bc_ptrs[level];
3262 if (ptr == 0) {
3263 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3264 *stat = 0;
3265 return 0;
3266 }
3267
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003268 optr = ptr;
3269
3270 XFS_BTREE_STATS_INC(cur, insrec);
3271
3272 /* Get pointers to the btree buffer and block. */
3273 block = xfs_btree_get_block(cur, level, &bp);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003274 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003275 numrecs = xfs_btree_get_numrecs(block);
3276
3277#ifdef DEBUG
3278 error = xfs_btree_check_block(cur, block, level, bp);
3279 if (error)
3280 goto error0;
3281
3282 /* Check that the new entry is being inserted in the right place. */
3283 if (ptr <= numrecs) {
3284 if (level == 0) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003285 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003286 xfs_btree_rec_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003287 } else {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003288 ASSERT(cur->bc_ops->keys_inorder(cur, key,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003289 xfs_btree_key_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003290 }
3291 }
3292#endif
3293
3294 /*
3295 * If the block is full, we can't insert the new entry until we
3296 * make the block un-full.
3297 */
3298 xfs_btree_set_ptr_null(cur, &nptr);
3299 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3300 error = xfs_btree_make_block_unfull(cur, level, numrecs,
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003301 &optr, &ptr, &nptr, &ncur, lkey, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003302 if (error || *stat == 0)
3303 goto error0;
3304 }
3305
3306 /*
3307 * The current block may have changed if the block was
3308 * previously full and we have just made space in it.
3309 */
3310 block = xfs_btree_get_block(cur, level, &bp);
3311 numrecs = xfs_btree_get_numrecs(block);
3312
3313#ifdef DEBUG
3314 error = xfs_btree_check_block(cur, block, level, bp);
3315 if (error)
3316 return error;
3317#endif
3318
3319 /*
3320 * At this point we know there's room for our new entry in the block
3321 * we're pointing at.
3322 */
3323 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3324
3325 if (level > 0) {
3326 /* It's a nonleaf. make a hole in the keys and ptrs */
3327 union xfs_btree_key *kp;
3328 union xfs_btree_ptr *pp;
3329
3330 kp = xfs_btree_key_addr(cur, ptr, block);
3331 pp = xfs_btree_ptr_addr(cur, ptr, block);
3332
3333#ifdef DEBUG
3334 for (i = numrecs - ptr; i >= 0; i--) {
3335 error = xfs_btree_check_ptr(cur, pp, i, level);
3336 if (error)
3337 return error;
3338 }
3339#endif
3340
3341 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3342 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3343
3344#ifdef DEBUG
3345 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3346 if (error)
3347 goto error0;
3348#endif
3349
3350 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003351 xfs_btree_copy_keys(cur, kp, key, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003352 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3353 numrecs++;
3354 xfs_btree_set_numrecs(block, numrecs);
3355 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3356 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3357#ifdef DEBUG
3358 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003359 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3360 xfs_btree_key_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003361 }
3362#endif
3363 } else {
3364 /* It's a leaf. make a hole in the records */
3365 union xfs_btree_rec *rp;
3366
3367 rp = xfs_btree_rec_addr(cur, ptr, block);
3368
3369 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3370
3371 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003372 xfs_btree_copy_recs(cur, rp, rec, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003373 xfs_btree_set_numrecs(block, ++numrecs);
3374 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3375#ifdef DEBUG
3376 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003377 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3378 xfs_btree_rec_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003379 }
3380#endif
3381 }
3382
3383 /* Log the new number of records in the btree header. */
3384 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3385
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003386 /*
3387 * If we just inserted into a new tree block, we have to
3388 * recalculate nkey here because nkey is out of date.
3389 *
3390 * Otherwise we're just updating an existing block (having shoved
3391 * some records into the new tree block), so use the regular key
3392 * update mechanism.
3393 */
3394 if (bp && bp->b_bn != old_bn) {
3395 xfs_btree_get_keys(cur, block, lkey);
3396 } else if (xfs_btree_needs_key_update(cur, optr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10003397 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003398 if (error)
3399 goto error0;
3400 }
3401
3402 /*
3403 * If we are tracking the last record in the tree and
3404 * we are at the far right edge of the tree, update it.
3405 */
3406 if (xfs_btree_is_lastrec(cur, block, level)) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003407 cur->bc_ops->update_lastrec(cur, block, rec,
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003408 ptr, LASTREC_INSREC);
3409 }
3410
3411 /*
3412 * Return the new block number, if any.
3413 * If there is one, give back a record value and a cursor too.
3414 */
3415 *ptrp = nptr;
3416 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003417 xfs_btree_copy_keys(cur, key, lkey, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003418 *curp = ncur;
3419 }
3420
3421 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3422 *stat = 1;
3423 return 0;
3424
3425error0:
3426 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3427 return error;
3428}
3429
3430/*
3431 * Insert the record at the point referenced by cur.
3432 *
3433 * A multi-level split of the tree on insert will invalidate the original
3434 * cursor. All callers of this function should assume that the cursor is
3435 * no longer valid and revalidate it.
3436 */
3437int
3438xfs_btree_insert(
3439 struct xfs_btree_cur *cur,
3440 int *stat)
3441{
3442 int error; /* error return value */
3443 int i; /* result value, 0 for failure */
3444 int level; /* current level number in btree */
3445 union xfs_btree_ptr nptr; /* new block number (split result) */
3446 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3447 struct xfs_btree_cur *pcur; /* previous level's cursor */
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003448 union xfs_btree_key bkey; /* key of block to insert */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003449 union xfs_btree_key *key;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003450 union xfs_btree_rec rec; /* record to insert */
3451
3452 level = 0;
3453 ncur = NULL;
3454 pcur = cur;
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003455 key = &bkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003456
3457 xfs_btree_set_ptr_null(cur, &nptr);
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003458
3459 /* Make a key out of the record data to be inserted, and save it. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003460 cur->bc_ops->init_rec_from_cur(cur, &rec);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003461 cur->bc_ops->init_key_from_rec(key, &rec);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003462
3463 /*
3464 * Loop going up the tree, starting at the leaf level.
3465 * Stop when we don't get a split block, that must mean that
3466 * the insert is finished with this level.
3467 */
3468 do {
3469 /*
3470 * Insert nrec/nptr into this level of the tree.
3471 * Note if we fail, nptr will be null.
3472 */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003473 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003474 &ncur, &i);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003475 if (error) {
3476 if (pcur != cur)
3477 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3478 goto error0;
3479 }
3480
Eric Sandeenc29aad42015-02-23 22:39:08 +11003481 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003482 level++;
3483
3484 /*
3485 * See if the cursor we just used is trash.
3486 * Can't trash the caller's cursor, but otherwise we should
3487 * if ncur is a new cursor or we're about to be done.
3488 */
3489 if (pcur != cur &&
3490 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3491 /* Save the state from the cursor before we trash it */
3492 if (cur->bc_ops->update_cursor)
3493 cur->bc_ops->update_cursor(pcur, cur);
3494 cur->bc_nlevels = pcur->bc_nlevels;
3495 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3496 }
3497 /* If we got a new cursor, switch to it. */
3498 if (ncur) {
3499 pcur = ncur;
3500 ncur = NULL;
3501 }
3502 } while (!xfs_btree_ptr_is_null(cur, &nptr));
3503
3504 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3505 *stat = i;
3506 return 0;
3507error0:
3508 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3509 return error;
3510}
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003511
3512/*
3513 * Try to merge a non-leaf block back into the inode root.
3514 *
3515 * Note: the killroot names comes from the fact that we're effectively
3516 * killing the old root block. But because we can't just delete the
3517 * inode we have to copy the single block it was pointing to into the
3518 * inode.
3519 */
Eric Sandeend96f8f82009-07-02 00:09:33 -05003520STATIC int
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003521xfs_btree_kill_iroot(
3522 struct xfs_btree_cur *cur)
3523{
3524 int whichfork = cur->bc_private.b.whichfork;
3525 struct xfs_inode *ip = cur->bc_private.b.ip;
3526 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3527 struct xfs_btree_block *block;
3528 struct xfs_btree_block *cblock;
3529 union xfs_btree_key *kp;
3530 union xfs_btree_key *ckp;
3531 union xfs_btree_ptr *pp;
3532 union xfs_btree_ptr *cpp;
3533 struct xfs_buf *cbp;
3534 int level;
3535 int index;
3536 int numrecs;
Christoph Hellwig196328e2016-02-08 14:58:07 +11003537 int error;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003538#ifdef DEBUG
3539 union xfs_btree_ptr ptr;
3540 int i;
3541#endif
3542
3543 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3544
3545 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3546 ASSERT(cur->bc_nlevels > 1);
3547
3548 /*
3549 * Don't deal with the root block needs to be a leaf case.
3550 * We're just going to turn the thing back into extents anyway.
3551 */
3552 level = cur->bc_nlevels - 1;
3553 if (level == 1)
3554 goto out0;
3555
3556 /*
3557 * Give up if the root has multiple children.
3558 */
3559 block = xfs_btree_get_iroot(cur);
3560 if (xfs_btree_get_numrecs(block) != 1)
3561 goto out0;
3562
3563 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3564 numrecs = xfs_btree_get_numrecs(cblock);
3565
3566 /*
3567 * Only do this if the next level will fit.
3568 * Then the data must be copied up to the inode,
3569 * instead of freeing the root you free the next level.
3570 */
3571 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3572 goto out0;
3573
3574 XFS_BTREE_STATS_INC(cur, killroot);
3575
3576#ifdef DEBUG
3577 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3578 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3579 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3580 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3581#endif
3582
3583 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3584 if (index) {
3585 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3586 cur->bc_private.b.whichfork);
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003587 block = ifp->if_broot;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003588 }
3589
3590 be16_add_cpu(&block->bb_numrecs, index);
3591 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3592
3593 kp = xfs_btree_key_addr(cur, 1, block);
3594 ckp = xfs_btree_key_addr(cur, 1, cblock);
3595 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3596
3597 pp = xfs_btree_ptr_addr(cur, 1, block);
3598 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3599#ifdef DEBUG
3600 for (i = 0; i < numrecs; i++) {
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003601 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3602 if (error) {
3603 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3604 return error;
3605 }
3606 }
3607#endif
3608 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3609
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003610 error = xfs_btree_free_block(cur, cbp);
Christoph Hellwig196328e2016-02-08 14:58:07 +11003611 if (error) {
3612 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3613 return error;
3614 }
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003615
3616 cur->bc_bufs[level - 1] = NULL;
3617 be16_add_cpu(&block->bb_level, -1);
3618 xfs_trans_log_inode(cur->bc_tp, ip,
Eric Sandeen9d87c312009-01-14 23:22:07 -06003619 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003620 cur->bc_nlevels--;
3621out0:
3622 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3623 return 0;
3624}
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003625
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003626/*
3627 * Kill the current root node, and replace it with it's only child node.
3628 */
3629STATIC int
3630xfs_btree_kill_root(
3631 struct xfs_btree_cur *cur,
3632 struct xfs_buf *bp,
3633 int level,
3634 union xfs_btree_ptr *newroot)
3635{
3636 int error;
3637
3638 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3639 XFS_BTREE_STATS_INC(cur, killroot);
3640
3641 /*
3642 * Update the root pointer, decreasing the level by 1 and then
3643 * free the old root.
3644 */
3645 cur->bc_ops->set_root(cur, newroot, -1);
3646
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003647 error = xfs_btree_free_block(cur, bp);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003648 if (error) {
3649 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3650 return error;
3651 }
3652
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003653 cur->bc_bufs[level] = NULL;
3654 cur->bc_ra[level] = 0;
3655 cur->bc_nlevels--;
3656
3657 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3658 return 0;
3659}
3660
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003661STATIC int
3662xfs_btree_dec_cursor(
3663 struct xfs_btree_cur *cur,
3664 int level,
3665 int *stat)
3666{
3667 int error;
3668 int i;
3669
3670 if (level > 0) {
3671 error = xfs_btree_decrement(cur, level, &i);
3672 if (error)
3673 return error;
3674 }
3675
3676 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3677 *stat = 1;
3678 return 0;
3679}
3680
3681/*
3682 * Single level of the btree record deletion routine.
3683 * Delete record pointed to by cur/level.
3684 * Remove the record from its block then rebalance the tree.
3685 * Return 0 for error, 1 for done, 2 to go on to the next level.
3686 */
3687STATIC int /* error */
3688xfs_btree_delrec(
3689 struct xfs_btree_cur *cur, /* btree cursor */
3690 int level, /* level removing record from */
3691 int *stat) /* fail/done/go-on */
3692{
3693 struct xfs_btree_block *block; /* btree block */
3694 union xfs_btree_ptr cptr; /* current block ptr */
3695 struct xfs_buf *bp; /* buffer for block */
3696 int error; /* error return value */
3697 int i; /* loop counter */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003698 union xfs_btree_ptr lptr; /* left sibling block ptr */
3699 struct xfs_buf *lbp; /* left buffer pointer */
3700 struct xfs_btree_block *left; /* left btree block */
3701 int lrecs = 0; /* left record count */
3702 int ptr; /* key/record index */
3703 union xfs_btree_ptr rptr; /* right sibling block ptr */
3704 struct xfs_buf *rbp; /* right buffer pointer */
3705 struct xfs_btree_block *right; /* right btree block */
3706 struct xfs_btree_block *rrblock; /* right-right btree block */
3707 struct xfs_buf *rrbp; /* right-right buffer pointer */
3708 int rrecs = 0; /* right record count */
3709 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3710 int numrecs; /* temporary numrec count */
3711
3712 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3713 XFS_BTREE_TRACE_ARGI(cur, level);
3714
3715 tcur = NULL;
3716
3717 /* Get the index of the entry being deleted, check for nothing there. */
3718 ptr = cur->bc_ptrs[level];
3719 if (ptr == 0) {
3720 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3721 *stat = 0;
3722 return 0;
3723 }
3724
3725 /* Get the buffer & block containing the record or key/ptr. */
3726 block = xfs_btree_get_block(cur, level, &bp);
3727 numrecs = xfs_btree_get_numrecs(block);
3728
3729#ifdef DEBUG
3730 error = xfs_btree_check_block(cur, block, level, bp);
3731 if (error)
3732 goto error0;
3733#endif
3734
3735 /* Fail if we're off the end of the block. */
3736 if (ptr > numrecs) {
3737 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3738 *stat = 0;
3739 return 0;
3740 }
3741
3742 XFS_BTREE_STATS_INC(cur, delrec);
3743 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3744
3745 /* Excise the entries being deleted. */
3746 if (level > 0) {
3747 /* It's a nonleaf. operate on keys and ptrs */
3748 union xfs_btree_key *lkp;
3749 union xfs_btree_ptr *lpp;
3750
3751 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3752 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3753
3754#ifdef DEBUG
3755 for (i = 0; i < numrecs - ptr; i++) {
3756 error = xfs_btree_check_ptr(cur, lpp, i, level);
3757 if (error)
3758 goto error0;
3759 }
3760#endif
3761
3762 if (ptr < numrecs) {
3763 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3764 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3765 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3766 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3767 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003768 } else {
3769 /* It's a leaf. operate on records */
3770 if (ptr < numrecs) {
3771 xfs_btree_shift_recs(cur,
3772 xfs_btree_rec_addr(cur, ptr + 1, block),
3773 -1, numrecs - ptr);
3774 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3775 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003776 }
3777
3778 /*
3779 * Decrement and log the number of entries in the block.
3780 */
3781 xfs_btree_set_numrecs(block, --numrecs);
3782 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3783
3784 /*
3785 * If we are tracking the last record in the tree and
3786 * we are at the far right edge of the tree, update it.
3787 */
3788 if (xfs_btree_is_lastrec(cur, block, level)) {
3789 cur->bc_ops->update_lastrec(cur, block, NULL,
3790 ptr, LASTREC_DELREC);
3791 }
3792
3793 /*
3794 * We're at the root level. First, shrink the root block in-memory.
3795 * Try to get rid of the next level down. If we can't then there's
3796 * nothing left to do.
3797 */
3798 if (level == cur->bc_nlevels - 1) {
3799 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3800 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3801 cur->bc_private.b.whichfork);
3802
3803 error = xfs_btree_kill_iroot(cur);
3804 if (error)
3805 goto error0;
3806
3807 error = xfs_btree_dec_cursor(cur, level, stat);
3808 if (error)
3809 goto error0;
3810 *stat = 1;
3811 return 0;
3812 }
3813
3814 /*
3815 * If this is the root level, and there's only one entry left,
3816 * and it's NOT the leaf level, then we can get rid of this
3817 * level.
3818 */
3819 if (numrecs == 1 && level > 0) {
3820 union xfs_btree_ptr *pp;
3821 /*
3822 * pp is still set to the first pointer in the block.
3823 * Make it the new root of the btree.
3824 */
3825 pp = xfs_btree_ptr_addr(cur, 1, block);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003826 error = xfs_btree_kill_root(cur, bp, level, pp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003827 if (error)
3828 goto error0;
3829 } else if (level > 0) {
3830 error = xfs_btree_dec_cursor(cur, level, stat);
3831 if (error)
3832 goto error0;
3833 }
3834 *stat = 1;
3835 return 0;
3836 }
3837
3838 /*
3839 * If we deleted the leftmost entry in the block, update the
3840 * key values above us in the tree.
3841 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003842 if (xfs_btree_needs_key_update(cur, ptr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10003843 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003844 if (error)
3845 goto error0;
3846 }
3847
3848 /*
3849 * If the number of records remaining in the block is at least
3850 * the minimum, we're done.
3851 */
3852 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3853 error = xfs_btree_dec_cursor(cur, level, stat);
3854 if (error)
3855 goto error0;
3856 return 0;
3857 }
3858
3859 /*
3860 * Otherwise, we have to move some records around to keep the
3861 * tree balanced. Look at the left and right sibling blocks to
3862 * see if we can re-balance by moving only one record.
3863 */
3864 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3865 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3866
3867 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3868 /*
3869 * One child of root, need to get a chance to copy its contents
3870 * into the root and delete it. Can't go up to next level,
3871 * there's nothing to delete there.
3872 */
3873 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3874 xfs_btree_ptr_is_null(cur, &lptr) &&
3875 level == cur->bc_nlevels - 2) {
3876 error = xfs_btree_kill_iroot(cur);
3877 if (!error)
3878 error = xfs_btree_dec_cursor(cur, level, stat);
3879 if (error)
3880 goto error0;
3881 return 0;
3882 }
3883 }
3884
3885 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3886 !xfs_btree_ptr_is_null(cur, &lptr));
3887
3888 /*
3889 * Duplicate the cursor so our btree manipulations here won't
3890 * disrupt the next level up.
3891 */
3892 error = xfs_btree_dup_cursor(cur, &tcur);
3893 if (error)
3894 goto error0;
3895
3896 /*
3897 * If there's a right sibling, see if it's ok to shift an entry
3898 * out of it.
3899 */
3900 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3901 /*
3902 * Move the temp cursor to the last entry in the next block.
3903 * Actually any entry but the first would suffice.
3904 */
3905 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003906 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003907
3908 error = xfs_btree_increment(tcur, level, &i);
3909 if (error)
3910 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003911 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003912
3913 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003914 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003915
3916 /* Grab a pointer to the block. */
3917 right = xfs_btree_get_block(tcur, level, &rbp);
3918#ifdef DEBUG
3919 error = xfs_btree_check_block(tcur, right, level, rbp);
3920 if (error)
3921 goto error0;
3922#endif
3923 /* Grab the current block number, for future use. */
3924 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3925
3926 /*
3927 * If right block is full enough so that removing one entry
3928 * won't make it too empty, and left-shifting an entry out
3929 * of right to us works, we're done.
3930 */
3931 if (xfs_btree_get_numrecs(right) - 1 >=
3932 cur->bc_ops->get_minrecs(tcur, level)) {
3933 error = xfs_btree_lshift(tcur, level, &i);
3934 if (error)
3935 goto error0;
3936 if (i) {
3937 ASSERT(xfs_btree_get_numrecs(block) >=
3938 cur->bc_ops->get_minrecs(tcur, level));
3939
3940 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3941 tcur = NULL;
3942
3943 error = xfs_btree_dec_cursor(cur, level, stat);
3944 if (error)
3945 goto error0;
3946 return 0;
3947 }
3948 }
3949
3950 /*
3951 * Otherwise, grab the number of records in right for
3952 * future reference, and fix up the temp cursor to point
3953 * to our block again (last record).
3954 */
3955 rrecs = xfs_btree_get_numrecs(right);
3956 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3957 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003958 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003959
3960 error = xfs_btree_decrement(tcur, level, &i);
3961 if (error)
3962 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003963 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003964 }
3965 }
3966
3967 /*
3968 * If there's a left sibling, see if it's ok to shift an entry
3969 * out of it.
3970 */
3971 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3972 /*
3973 * Move the temp cursor to the first entry in the
3974 * previous block.
3975 */
3976 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003977 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003978
3979 error = xfs_btree_decrement(tcur, level, &i);
3980 if (error)
3981 goto error0;
3982 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003983 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003984
3985 /* Grab a pointer to the block. */
3986 left = xfs_btree_get_block(tcur, level, &lbp);
3987#ifdef DEBUG
3988 error = xfs_btree_check_block(cur, left, level, lbp);
3989 if (error)
3990 goto error0;
3991#endif
3992 /* Grab the current block number, for future use. */
3993 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3994
3995 /*
3996 * If left block is full enough so that removing one entry
3997 * won't make it too empty, and right-shifting an entry out
3998 * of left to us works, we're done.
3999 */
4000 if (xfs_btree_get_numrecs(left) - 1 >=
4001 cur->bc_ops->get_minrecs(tcur, level)) {
4002 error = xfs_btree_rshift(tcur, level, &i);
4003 if (error)
4004 goto error0;
4005 if (i) {
4006 ASSERT(xfs_btree_get_numrecs(block) >=
4007 cur->bc_ops->get_minrecs(tcur, level));
4008 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4009 tcur = NULL;
4010 if (level == 0)
4011 cur->bc_ptrs[0]++;
4012 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4013 *stat = 1;
4014 return 0;
4015 }
4016 }
4017
4018 /*
4019 * Otherwise, grab the number of records in right for
4020 * future reference.
4021 */
4022 lrecs = xfs_btree_get_numrecs(left);
4023 }
4024
4025 /* Delete the temp cursor, we're done with it. */
4026 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4027 tcur = NULL;
4028
4029 /* If here, we need to do a join to keep the tree balanced. */
4030 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4031
4032 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4033 lrecs + xfs_btree_get_numrecs(block) <=
4034 cur->bc_ops->get_maxrecs(cur, level)) {
4035 /*
4036 * Set "right" to be the starting block,
4037 * "left" to be the left neighbor.
4038 */
4039 rptr = cptr;
4040 right = block;
4041 rbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004042 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004043 if (error)
4044 goto error0;
4045
4046 /*
4047 * If that won't work, see if we can join with the right neighbor block.
4048 */
4049 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4050 rrecs + xfs_btree_get_numrecs(block) <=
4051 cur->bc_ops->get_maxrecs(cur, level)) {
4052 /*
4053 * Set "left" to be the starting block,
4054 * "right" to be the right neighbor.
4055 */
4056 lptr = cptr;
4057 left = block;
4058 lbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004059 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004060 if (error)
4061 goto error0;
4062
4063 /*
4064 * Otherwise, we can't fix the imbalance.
4065 * Just return. This is probably a logic error, but it's not fatal.
4066 */
4067 } else {
4068 error = xfs_btree_dec_cursor(cur, level, stat);
4069 if (error)
4070 goto error0;
4071 return 0;
4072 }
4073
4074 rrecs = xfs_btree_get_numrecs(right);
4075 lrecs = xfs_btree_get_numrecs(left);
4076
4077 /*
4078 * We're now going to join "left" and "right" by moving all the stuff
4079 * in "right" to "left" and deleting "right".
4080 */
4081 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4082 if (level > 0) {
4083 /* It's a non-leaf. Move keys and pointers. */
4084 union xfs_btree_key *lkp; /* left btree key */
4085 union xfs_btree_ptr *lpp; /* left address pointer */
4086 union xfs_btree_key *rkp; /* right btree key */
4087 union xfs_btree_ptr *rpp; /* right address pointer */
4088
4089 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4090 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4091 rkp = xfs_btree_key_addr(cur, 1, right);
4092 rpp = xfs_btree_ptr_addr(cur, 1, right);
4093#ifdef DEBUG
4094 for (i = 1; i < rrecs; i++) {
4095 error = xfs_btree_check_ptr(cur, rpp, i, level);
4096 if (error)
4097 goto error0;
4098 }
4099#endif
4100 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4101 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4102
4103 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4104 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4105 } else {
4106 /* It's a leaf. Move records. */
4107 union xfs_btree_rec *lrp; /* left record pointer */
4108 union xfs_btree_rec *rrp; /* right record pointer */
4109
4110 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4111 rrp = xfs_btree_rec_addr(cur, 1, right);
4112
4113 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4114 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4115 }
4116
4117 XFS_BTREE_STATS_INC(cur, join);
4118
4119 /*
Malcolm Parsons9da096f2009-03-29 09:55:42 +02004120 * Fix up the number of records and right block pointer in the
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004121 * surviving block, and log it.
4122 */
4123 xfs_btree_set_numrecs(left, lrecs + rrecs);
4124 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4125 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4126 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4127
4128 /* If there is a right sibling, point it to the remaining block. */
4129 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4130 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004131 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004132 if (error)
4133 goto error0;
4134 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4135 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4136 }
4137
4138 /* Free the deleted block. */
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11004139 error = xfs_btree_free_block(cur, rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004140 if (error)
4141 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004142
4143 /*
4144 * If we joined with the left neighbor, set the buffer in the
4145 * cursor to the left block, and fix up the index.
4146 */
4147 if (bp != lbp) {
4148 cur->bc_bufs[level] = lbp;
4149 cur->bc_ptrs[level] += lrecs;
4150 cur->bc_ra[level] = 0;
4151 }
4152 /*
4153 * If we joined with the right neighbor and there's a level above
4154 * us, increment the cursor at that level.
4155 */
4156 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4157 (level + 1 < cur->bc_nlevels)) {
4158 error = xfs_btree_increment(cur, level + 1, &i);
4159 if (error)
4160 goto error0;
4161 }
4162
4163 /*
4164 * Readjust the ptr at this level if it's not a leaf, since it's
4165 * still pointing at the deletion point, which makes the cursor
4166 * inconsistent. If this makes the ptr 0, the caller fixes it up.
4167 * We can't use decrement because it would change the next level up.
4168 */
4169 if (level > 0)
4170 cur->bc_ptrs[level]--;
4171
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004172 /*
4173 * We combined blocks, so we have to update the parent keys if the
4174 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4175 * points to the old block so that the caller knows which record to
4176 * delete. Therefore, the caller must be savvy enough to call updkeys
4177 * for us if we return stat == 2. The other exit points from this
4178 * function don't require deletions further up the tree, so they can
4179 * call updkeys directly.
4180 */
4181
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004182 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4183 /* Return value means the next level up has something to do. */
4184 *stat = 2;
4185 return 0;
4186
4187error0:
4188 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4189 if (tcur)
4190 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4191 return error;
4192}
4193
4194/*
4195 * Delete the record pointed to by cur.
4196 * The cursor refers to the place where the record was (could be inserted)
4197 * when the operation returns.
4198 */
4199int /* error */
4200xfs_btree_delete(
4201 struct xfs_btree_cur *cur,
4202 int *stat) /* success/failure */
4203{
4204 int error; /* error return value */
4205 int level;
4206 int i;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004207 bool joined = false;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004208
4209 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
4210
4211 /*
4212 * Go up the tree, starting at leaf level.
4213 *
4214 * If 2 is returned then a join was done; go to the next level.
4215 * Otherwise we are done.
4216 */
4217 for (level = 0, i = 2; i == 2; level++) {
4218 error = xfs_btree_delrec(cur, level, &i);
4219 if (error)
4220 goto error0;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004221 if (i == 2)
4222 joined = true;
4223 }
4224
4225 /*
4226 * If we combined blocks as part of deleting the record, delrec won't
4227 * have updated the parent high keys so we have to do that here.
4228 */
4229 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4230 error = xfs_btree_updkeys_force(cur, 0);
4231 if (error)
4232 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004233 }
4234
4235 if (i == 0) {
4236 for (level = 1; level < cur->bc_nlevels; level++) {
4237 if (cur->bc_ptrs[level] == 0) {
4238 error = xfs_btree_decrement(cur, level, &i);
4239 if (error)
4240 goto error0;
4241 break;
4242 }
4243 }
4244 }
4245
4246 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4247 *stat = i;
4248 return 0;
4249error0:
4250 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4251 return error;
4252}
Christoph Hellwig8cc938f2008-10-30 16:58:11 +11004253
4254/*
4255 * Get the data from the pointed-to record.
4256 */
4257int /* error */
4258xfs_btree_get_rec(
4259 struct xfs_btree_cur *cur, /* btree cursor */
4260 union xfs_btree_rec **recp, /* output: btree record */
4261 int *stat) /* output: success/failure */
4262{
4263 struct xfs_btree_block *block; /* btree block */
4264 struct xfs_buf *bp; /* buffer pointer */
4265 int ptr; /* record number */
4266#ifdef DEBUG
4267 int error; /* error return value */
4268#endif
4269
4270 ptr = cur->bc_ptrs[0];
4271 block = xfs_btree_get_block(cur, 0, &bp);
4272
4273#ifdef DEBUG
4274 error = xfs_btree_check_block(cur, block, 0, bp);
4275 if (error)
4276 return error;
4277#endif
4278
4279 /*
4280 * Off the right end or left end, return failure.
4281 */
4282 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4283 *stat = 0;
4284 return 0;
4285 }
4286
4287 /*
4288 * Point to the record and extract its data.
4289 */
4290 *recp = xfs_btree_rec_addr(cur, ptr, block);
4291 *stat = 1;
4292 return 0;
4293}
Dave Chinner21b5c972013-08-30 10:23:44 +10004294
Darrick J. Wong28a89562016-08-03 11:10:55 +10004295/* Visit a block in a btree. */
4296STATIC int
4297xfs_btree_visit_block(
4298 struct xfs_btree_cur *cur,
4299 int level,
4300 xfs_btree_visit_blocks_fn fn,
4301 void *data)
4302{
4303 struct xfs_btree_block *block;
4304 struct xfs_buf *bp;
4305 union xfs_btree_ptr rptr;
4306 int error;
4307
4308 /* do right sibling readahead */
4309 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4310 block = xfs_btree_get_block(cur, level, &bp);
4311
4312 /* process the block */
4313 error = fn(cur, level, data);
4314 if (error)
4315 return error;
4316
4317 /* now read rh sibling block for next iteration */
4318 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4319 if (xfs_btree_ptr_is_null(cur, &rptr))
4320 return -ENOENT;
4321
4322 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4323}
4324
4325
4326/* Visit every block in a btree. */
4327int
4328xfs_btree_visit_blocks(
4329 struct xfs_btree_cur *cur,
4330 xfs_btree_visit_blocks_fn fn,
4331 void *data)
4332{
4333 union xfs_btree_ptr lptr;
4334 int level;
4335 struct xfs_btree_block *block = NULL;
4336 int error = 0;
4337
4338 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4339
4340 /* for each level */
4341 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4342 /* grab the left hand block */
4343 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4344 if (error)
4345 return error;
4346
4347 /* readahead the left most block for the next level down */
4348 if (level > 0) {
4349 union xfs_btree_ptr *ptr;
4350
4351 ptr = xfs_btree_ptr_addr(cur, 1, block);
4352 xfs_btree_readahead_ptr(cur, ptr, 1);
4353
4354 /* save for the next iteration of the loop */
4355 lptr = *ptr;
4356 }
4357
4358 /* for each buffer in the level */
4359 do {
4360 error = xfs_btree_visit_block(cur, level, fn, data);
4361 } while (!error);
4362
4363 if (error != -ENOENT)
4364 return error;
4365 }
4366
4367 return 0;
4368}
4369
Dave Chinner21b5c972013-08-30 10:23:44 +10004370/*
4371 * Change the owner of a btree.
4372 *
4373 * The mechanism we use here is ordered buffer logging. Because we don't know
4374 * how many buffers were are going to need to modify, we don't really want to
4375 * have to make transaction reservations for the worst case of every buffer in a
4376 * full size btree as that may be more space that we can fit in the log....
4377 *
4378 * We do the btree walk in the most optimal manner possible - we have sibling
4379 * pointers so we can just walk all the blocks on each level from left to right
4380 * in a single pass, and then move to the next level and do the same. We can
4381 * also do readahead on the sibling pointers to get IO moving more quickly,
4382 * though for slow disks this is unlikely to make much difference to performance
4383 * as the amount of CPU work we have to do before moving to the next block is
4384 * relatively small.
4385 *
4386 * For each btree block that we load, modify the owner appropriately, set the
4387 * buffer as an ordered buffer and log it appropriately. We need to ensure that
4388 * we mark the region we change dirty so that if the buffer is relogged in
4389 * a subsequent transaction the changes we make here as an ordered buffer are
Dave Chinner638f44162013-08-30 10:23:45 +10004390 * correctly relogged in that transaction. If we are in recovery context, then
4391 * just queue the modified buffer as delayed write buffer so the transaction
4392 * recovery completion writes the changes to disk.
Dave Chinner21b5c972013-08-30 10:23:44 +10004393 */
Darrick J. Wong28a89562016-08-03 11:10:55 +10004394struct xfs_btree_block_change_owner_info {
4395 __uint64_t new_owner;
4396 struct list_head *buffer_list;
4397};
4398
Dave Chinner21b5c972013-08-30 10:23:44 +10004399static int
4400xfs_btree_block_change_owner(
4401 struct xfs_btree_cur *cur,
4402 int level,
Darrick J. Wong28a89562016-08-03 11:10:55 +10004403 void *data)
Dave Chinner21b5c972013-08-30 10:23:44 +10004404{
Darrick J. Wong28a89562016-08-03 11:10:55 +10004405 struct xfs_btree_block_change_owner_info *bbcoi = data;
Dave Chinner21b5c972013-08-30 10:23:44 +10004406 struct xfs_btree_block *block;
4407 struct xfs_buf *bp;
Dave Chinner21b5c972013-08-30 10:23:44 +10004408
4409 /* modify the owner */
4410 block = xfs_btree_get_block(cur, level, &bp);
4411 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Darrick J. Wong28a89562016-08-03 11:10:55 +10004412 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
Dave Chinner21b5c972013-08-30 10:23:44 +10004413 else
Darrick J. Wong28a89562016-08-03 11:10:55 +10004414 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
Dave Chinner21b5c972013-08-30 10:23:44 +10004415
4416 /*
Dave Chinner638f44162013-08-30 10:23:45 +10004417 * If the block is a root block hosted in an inode, we might not have a
4418 * buffer pointer here and we shouldn't attempt to log the change as the
4419 * information is already held in the inode and discarded when the root
4420 * block is formatted into the on-disk inode fork. We still change it,
4421 * though, so everything is consistent in memory.
Dave Chinner21b5c972013-08-30 10:23:44 +10004422 */
4423 if (bp) {
Dave Chinner638f44162013-08-30 10:23:45 +10004424 if (cur->bc_tp) {
4425 xfs_trans_ordered_buf(cur->bc_tp, bp);
4426 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4427 } else {
Darrick J. Wong28a89562016-08-03 11:10:55 +10004428 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
Dave Chinner638f44162013-08-30 10:23:45 +10004429 }
Dave Chinner21b5c972013-08-30 10:23:44 +10004430 } else {
4431 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4432 ASSERT(level == cur->bc_nlevels - 1);
4433 }
4434
Darrick J. Wong28a89562016-08-03 11:10:55 +10004435 return 0;
Dave Chinner21b5c972013-08-30 10:23:44 +10004436}
4437
4438int
4439xfs_btree_change_owner(
4440 struct xfs_btree_cur *cur,
Dave Chinner638f44162013-08-30 10:23:45 +10004441 __uint64_t new_owner,
4442 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10004443{
Darrick J. Wong28a89562016-08-03 11:10:55 +10004444 struct xfs_btree_block_change_owner_info bbcoi;
Dave Chinner21b5c972013-08-30 10:23:44 +10004445
Darrick J. Wong28a89562016-08-03 11:10:55 +10004446 bbcoi.new_owner = new_owner;
4447 bbcoi.buffer_list = buffer_list;
Dave Chinner21b5c972013-08-30 10:23:44 +10004448
Darrick J. Wong28a89562016-08-03 11:10:55 +10004449 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4450 &bbcoi);
Dave Chinner21b5c972013-08-30 10:23:44 +10004451}
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004452
4453/**
4454 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4455 * btree block
4456 *
4457 * @bp: buffer containing the btree block
4458 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4459 * @pag_max_level: pointer to the per-ag max level field
4460 */
4461bool
4462xfs_btree_sblock_v5hdr_verify(
4463 struct xfs_buf *bp)
4464{
4465 struct xfs_mount *mp = bp->b_target->bt_mount;
4466 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4467 struct xfs_perag *pag = bp->b_pag;
4468
4469 if (!xfs_sb_version_hascrc(&mp->m_sb))
4470 return false;
4471 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4472 return false;
4473 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4474 return false;
4475 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4476 return false;
4477 return true;
4478}
4479
4480/**
4481 * xfs_btree_sblock_verify() -- verify a short-format btree block
4482 *
4483 * @bp: buffer containing the btree block
4484 * @max_recs: maximum records allowed in this btree node
4485 */
4486bool
4487xfs_btree_sblock_verify(
4488 struct xfs_buf *bp,
4489 unsigned int max_recs)
4490{
4491 struct xfs_mount *mp = bp->b_target->bt_mount;
4492 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4493
4494 /* numrecs verification */
4495 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4496 return false;
4497
4498 /* sibling pointer verification */
4499 if (!block->bb_u.s.bb_leftsib ||
4500 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4501 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4502 return false;
4503 if (!block->bb_u.s.bb_rightsib ||
4504 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4505 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4506 return false;
4507
4508 return true;
4509}
Darrick J. Wong19b54ee2016-06-21 11:53:28 +10004510
4511/*
4512 * Calculate the number of btree levels needed to store a given number of
4513 * records in a short-format btree.
4514 */
4515uint
4516xfs_btree_compute_maxlevels(
4517 struct xfs_mount *mp,
4518 uint *limits,
4519 unsigned long len)
4520{
4521 uint level;
4522 unsigned long maxblocks;
4523
4524 maxblocks = (len + limits[0] - 1) / limits[0];
4525 for (level = 1; maxblocks > 1; level++)
4526 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4527 return level;
4528}
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004529
4530/*
4531 * Query a regular btree for all records overlapping a given interval.
4532 * Start with a LE lookup of the key of low_rec and return all records
4533 * until we find a record with a key greater than the key of high_rec.
4534 */
4535STATIC int
4536xfs_btree_simple_query_range(
4537 struct xfs_btree_cur *cur,
4538 union xfs_btree_key *low_key,
4539 union xfs_btree_key *high_key,
4540 xfs_btree_query_range_fn fn,
4541 void *priv)
4542{
4543 union xfs_btree_rec *recp;
4544 union xfs_btree_key rec_key;
4545 __int64_t diff;
4546 int stat;
4547 bool firstrec = true;
4548 int error;
4549
4550 ASSERT(cur->bc_ops->init_high_key_from_rec);
4551 ASSERT(cur->bc_ops->diff_two_keys);
4552
4553 /*
4554 * Find the leftmost record. The btree cursor must be set
4555 * to the low record used to generate low_key.
4556 */
4557 stat = 0;
4558 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4559 if (error)
4560 goto out;
4561
Darrick J. Wong5b5c2db2016-08-26 16:00:10 +10004562 /* Nothing? See if there's anything to the right. */
4563 if (!stat) {
4564 error = xfs_btree_increment(cur, 0, &stat);
4565 if (error)
4566 goto out;
4567 }
4568
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004569 while (stat) {
4570 /* Find the record. */
4571 error = xfs_btree_get_rec(cur, &recp, &stat);
4572 if (error || !stat)
4573 break;
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004574
4575 /* Skip if high_key(rec) < low_key. */
4576 if (firstrec) {
Darrick J. Wong72227892016-08-26 15:59:50 +10004577 cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004578 firstrec = false;
4579 diff = cur->bc_ops->diff_two_keys(cur, low_key,
4580 &rec_key);
4581 if (diff > 0)
4582 goto advloop;
4583 }
4584
4585 /* Stop if high_key < low_key(rec). */
Darrick J. Wong72227892016-08-26 15:59:50 +10004586 cur->bc_ops->init_key_from_rec(&rec_key, recp);
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004587 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4588 if (diff > 0)
4589 break;
4590
4591 /* Callback */
4592 error = fn(cur, recp, priv);
4593 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4594 break;
4595
4596advloop:
4597 /* Move on to the next record. */
4598 error = xfs_btree_increment(cur, 0, &stat);
4599 if (error)
4600 break;
4601 }
4602
4603out:
4604 return error;
4605}
4606
4607/*
4608 * Query an overlapped interval btree for all records overlapping a given
4609 * interval. This function roughly follows the algorithm given in
4610 * "Interval Trees" of _Introduction to Algorithms_, which is section
4611 * 14.3 in the 2nd and 3rd editions.
4612 *
4613 * First, generate keys for the low and high records passed in.
4614 *
4615 * For any leaf node, generate the high and low keys for the record.
4616 * If the record keys overlap with the query low/high keys, pass the
4617 * record to the function iterator.
4618 *
4619 * For any internal node, compare the low and high keys of each
4620 * pointer against the query low/high keys. If there's an overlap,
4621 * follow the pointer.
4622 *
4623 * As an optimization, we stop scanning a block when we find a low key
4624 * that is greater than the query's high key.
4625 */
4626STATIC int
4627xfs_btree_overlapped_query_range(
4628 struct xfs_btree_cur *cur,
4629 union xfs_btree_key *low_key,
4630 union xfs_btree_key *high_key,
4631 xfs_btree_query_range_fn fn,
4632 void *priv)
4633{
4634 union xfs_btree_ptr ptr;
4635 union xfs_btree_ptr *pp;
4636 union xfs_btree_key rec_key;
4637 union xfs_btree_key rec_hkey;
4638 union xfs_btree_key *lkp;
4639 union xfs_btree_key *hkp;
4640 union xfs_btree_rec *recp;
4641 struct xfs_btree_block *block;
4642 __int64_t ldiff;
4643 __int64_t hdiff;
4644 int level;
4645 struct xfs_buf *bp;
4646 int i;
4647 int error;
4648
4649 /* Load the root of the btree. */
4650 level = cur->bc_nlevels - 1;
4651 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4652 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4653 if (error)
4654 return error;
4655 xfs_btree_get_block(cur, level, &bp);
4656 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4657#ifdef DEBUG
4658 error = xfs_btree_check_block(cur, block, level, bp);
4659 if (error)
4660 goto out;
4661#endif
4662 cur->bc_ptrs[level] = 1;
4663
4664 while (level < cur->bc_nlevels) {
4665 block = xfs_btree_get_block(cur, level, &bp);
4666
4667 /* End of node, pop back towards the root. */
4668 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4669pop_up:
4670 if (level < cur->bc_nlevels - 1)
4671 cur->bc_ptrs[level + 1]++;
4672 level++;
4673 continue;
4674 }
4675
4676 if (level == 0) {
4677 /* Handle a leaf node. */
4678 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4679
4680 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4681 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4682 low_key);
4683
4684 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4685 hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4686 &rec_key);
4687
4688 /*
4689 * If (record's high key >= query's low key) and
4690 * (query's high key >= record's low key), then
4691 * this record overlaps the query range; callback.
4692 */
4693 if (ldiff >= 0 && hdiff >= 0) {
4694 error = fn(cur, recp, priv);
4695 if (error < 0 ||
4696 error == XFS_BTREE_QUERY_RANGE_ABORT)
4697 break;
4698 } else if (hdiff < 0) {
4699 /* Record is larger than high key; pop. */
4700 goto pop_up;
4701 }
4702 cur->bc_ptrs[level]++;
4703 continue;
4704 }
4705
4706 /* Handle an internal node. */
4707 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4708 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4709 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4710
4711 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4712 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4713
4714 /*
4715 * If (pointer's high key >= query's low key) and
4716 * (query's high key >= pointer's low key), then
4717 * this record overlaps the query range; follow pointer.
4718 */
4719 if (ldiff >= 0 && hdiff >= 0) {
4720 level--;
4721 error = xfs_btree_lookup_get_block(cur, level, pp,
4722 &block);
4723 if (error)
4724 goto out;
4725 xfs_btree_get_block(cur, level, &bp);
4726 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4727#ifdef DEBUG
4728 error = xfs_btree_check_block(cur, block, level, bp);
4729 if (error)
4730 goto out;
4731#endif
4732 cur->bc_ptrs[level] = 1;
4733 continue;
4734 } else if (hdiff < 0) {
4735 /* The low key is larger than the upper range; pop. */
4736 goto pop_up;
4737 }
4738 cur->bc_ptrs[level]++;
4739 }
4740
4741out:
4742 /*
4743 * If we don't end this function with the cursor pointing at a record
4744 * block, a subsequent non-error cursor deletion will not release
4745 * node-level buffers, causing a buffer leak. This is quite possible
4746 * with a zero-results range query, so release the buffers if we
4747 * failed to return any results.
4748 */
4749 if (cur->bc_bufs[0] == NULL) {
4750 for (i = 0; i < cur->bc_nlevels; i++) {
4751 if (cur->bc_bufs[i]) {
4752 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4753 cur->bc_bufs[i] = NULL;
4754 cur->bc_ptrs[i] = 0;
4755 cur->bc_ra[i] = 0;
4756 }
4757 }
4758 }
4759
4760 return error;
4761}
4762
4763/*
4764 * Query a btree for all records overlapping a given interval of keys. The
4765 * supplied function will be called with each record found; return one of the
4766 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4767 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4768 * negative error code.
4769 */
4770int
4771xfs_btree_query_range(
4772 struct xfs_btree_cur *cur,
4773 union xfs_btree_irec *low_rec,
4774 union xfs_btree_irec *high_rec,
4775 xfs_btree_query_range_fn fn,
4776 void *priv)
4777{
4778 union xfs_btree_rec rec;
4779 union xfs_btree_key low_key;
4780 union xfs_btree_key high_key;
4781
4782 /* Find the keys of both ends of the interval. */
4783 cur->bc_rec = *high_rec;
4784 cur->bc_ops->init_rec_from_cur(cur, &rec);
4785 cur->bc_ops->init_key_from_rec(&high_key, &rec);
4786
4787 cur->bc_rec = *low_rec;
4788 cur->bc_ops->init_rec_from_cur(cur, &rec);
4789 cur->bc_ops->init_key_from_rec(&low_key, &rec);
4790
4791 /* Enforce low key < high key. */
4792 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4793 return -EINVAL;
4794
4795 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4796 return xfs_btree_simple_query_range(cur, &low_key,
4797 &high_key, fn, priv);
4798 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4799 fn, priv);
4800}
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004801
4802/*
4803 * Calculate the number of blocks needed to store a given number of records
4804 * in a short-format (per-AG metadata) btree.
4805 */
4806xfs_extlen_t
4807xfs_btree_calc_size(
4808 struct xfs_mount *mp,
4809 uint *limits,
4810 unsigned long long len)
4811{
4812 int level;
4813 int maxrecs;
4814 xfs_extlen_t rval;
4815
4816 maxrecs = limits[0];
4817 for (level = 0, rval = 0; len > 1; level++) {
4818 len += maxrecs - 1;
4819 do_div(len, maxrecs);
4820 maxrecs = limits[1];
4821 rval += len;
4822 }
4823 return rval;
4824}
Darrick J. Wongc611cc02016-09-19 10:25:20 +10004825
4826int
4827xfs_btree_count_blocks_helper(
4828 struct xfs_btree_cur *cur,
4829 int level,
4830 void *data)
4831{
4832 xfs_extlen_t *blocks = data;
4833 (*blocks)++;
4834
4835 return 0;
4836}
4837
4838/* Count the blocks in a btree and return the result in *blocks. */
4839int
4840xfs_btree_count_blocks(
4841 struct xfs_btree_cur *cur,
4842 xfs_extlen_t *blocks)
4843{
4844 *blocks = 0;
4845 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4846 blocks);
4847}