blob: 5c1b35a23a3dfdd97952cd582924d4263d2ec093 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +110020#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110022#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110024#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110027#include "xfs_trans.h"
Christoph Hellwig38bb7422008-10-30 16:56:22 +110028#include "xfs_inode_item.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050029#include "xfs_buf_item.h"
Nathan Scotta844f452005-11-02 14:38:42 +110030#include "xfs_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000032#include "xfs_trace.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050033#include "xfs_cksum.h"
Dave Chinnercf11da92014-07-15 07:08:24 +100034#include "xfs_alloc.h"
Brian Fostera45086e2015-10-12 15:59:25 +110035#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
37/*
38 * Cursor allocation zone.
39 */
40kmem_zone_t *xfs_btree_cur_zone;
41
42/*
43 * Btree magic numbers.
44 */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050045static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
Brian Fosteraafc3c22014-04-24 16:00:52 +100046 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
47 XFS_FIBT_MAGIC },
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050048 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
Brian Fosteraafc3c22014-04-24 16:00:52 +100049 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
Linus Torvalds1da177e2005-04-16 15:20:36 -070050};
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050051#define xfs_btree_magic(cur) \
52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110054STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110055xfs_btree_check_lblock(
56 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110057 struct xfs_btree_block *block, /* btree long form block pointer */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110058 int level, /* level of the btree block */
59 struct xfs_buf *bp) /* buffer for block, if any */
60{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050061 int lblock_ok = 1; /* block passes checks */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110062 struct xfs_mount *mp; /* file system mount point */
63
64 mp = cur->bc_mp;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050065
66 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 lblock_ok = lblock_ok &&
Eric Sandeence748ea2015-07-29 11:53:31 +100068 uuid_equal(&block->bb_u.l.bb_uuid,
69 &mp->m_sb.sb_meta_uuid) &&
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050070 block->bb_u.l.bb_blkno == cpu_to_be64(
71 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
72 }
73
74 lblock_ok = lblock_ok &&
75 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110076 be16_to_cpu(block->bb_level) == level &&
77 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +110078 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110079 block->bb_u.l.bb_leftsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100080 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110081 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050082 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110083 block->bb_u.l.bb_rightsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100084 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110085 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050086 be64_to_cpu(block->bb_u.l.bb_rightsib)));
87
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110088 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
89 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
90 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
91 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000092 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050093 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +100094 return -EFSCORRUPTED;
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110095 }
96 return 0;
97}
98
Christoph Hellwig3cc75242008-10-30 16:58:41 +110099STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100xfs_btree_check_sblock(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100101 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100102 struct xfs_btree_block *block, /* btree short form block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 int level, /* level of the btree block */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100104 struct xfs_buf *bp) /* buffer containing block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500106 struct xfs_mount *mp; /* file system mount point */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100107 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
108 struct xfs_agf *agf; /* ag. freespace structure */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 xfs_agblock_t agflen; /* native ag. freespace length */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500110 int sblock_ok = 1; /* block passes checks */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500112 mp = cur->bc_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 agbp = cur->bc_private.a.agbp;
114 agf = XFS_BUF_TO_AGF(agbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +1100115 agflen = be32_to_cpu(agf->agf_length);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500116
117 if (xfs_sb_version_hascrc(&mp->m_sb)) {
118 sblock_ok = sblock_ok &&
Eric Sandeence748ea2015-07-29 11:53:31 +1000119 uuid_equal(&block->bb_u.s.bb_uuid,
120 &mp->m_sb.sb_meta_uuid) &&
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500121 block->bb_u.s.bb_blkno == cpu_to_be64(
122 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
123 }
124
125 sblock_ok = sblock_ok &&
126 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwig16259e72005-11-02 15:11:25 +1100127 be16_to_cpu(block->bb_level) == level &&
128 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +1100129 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200130 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100131 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
132 block->bb_u.s.bb_leftsib &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200133 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100134 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
135 block->bb_u.s.bb_rightsib;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500136
137 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
139 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
140 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000141 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500142 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000143 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 }
145 return 0;
146}
147
148/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100149 * Debug routine: check that block header is ok.
150 */
151int
152xfs_btree_check_block(
153 struct xfs_btree_cur *cur, /* btree cursor */
154 struct xfs_btree_block *block, /* generic btree block pointer */
155 int level, /* level of the btree block */
156 struct xfs_buf *bp) /* buffer containing block, if any */
157{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100158 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
159 return xfs_btree_check_lblock(cur, block, level, bp);
160 else
161 return xfs_btree_check_sblock(cur, block, level, bp);
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100162}
163
164/*
165 * Check that (long) pointer is ok.
166 */
167int /* error (0 or EFSCORRUPTED) */
168xfs_btree_check_lptr(
169 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000170 xfs_fsblock_t bno, /* btree block disk address */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100171 int level) /* btree block level */
172{
Eric Sandeen5fb5aee2015-02-23 22:39:13 +1100173 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100174 level > 0 &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000175 bno != NULLFSBLOCK &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100176 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
177 return 0;
178}
179
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100180#ifdef DEBUG
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100181/*
182 * Check that (short) pointer is ok.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100184STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185xfs_btree_check_sptr(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100186 struct xfs_btree_cur *cur, /* btree cursor */
187 xfs_agblock_t bno, /* btree block disk address */
188 int level) /* btree block level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100190 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191
Eric Sandeen5fb5aee2015-02-23 22:39:13 +1100192 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 level > 0 &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100194 bno != NULLAGBLOCK &&
195 bno != 0 &&
196 bno < agblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 return 0;
198}
199
200/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100201 * Check that block ptr is ok.
202 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100203STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100204xfs_btree_check_ptr(
205 struct xfs_btree_cur *cur, /* btree cursor */
206 union xfs_btree_ptr *ptr, /* btree block disk address */
207 int index, /* offset from ptr to check */
208 int level) /* btree block level */
209{
210 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
211 return xfs_btree_check_lptr(cur,
212 be64_to_cpu((&ptr->l)[index]), level);
213 } else {
214 return xfs_btree_check_sptr(cur,
215 be32_to_cpu((&ptr->s)[index]), level);
216 }
217}
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100218#endif
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100219
220/*
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500221 * Calculate CRC on the whole btree block and stuff it into the
222 * long-form btree header.
223 *
224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100225 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500226 * it to disk.
227 */
228void
229xfs_btree_lblock_calc_crc(
230 struct xfs_buf *bp)
231{
232 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
233 struct xfs_buf_log_item *bip = bp->b_fspriv;
234
235 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
236 return;
237 if (bip)
238 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100239 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500240}
241
242bool
243xfs_btree_lblock_verify_crc(
244 struct xfs_buf *bp)
245{
Brian Fostera45086e2015-10-12 15:59:25 +1100246 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
247 struct xfs_mount *mp = bp->b_target->bt_mount;
248
249 if (xfs_sb_version_hascrc(&mp->m_sb)) {
250 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
251 return false;
Eric Sandeen51582172014-02-27 15:17:27 +1100252 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100253 }
Eric Sandeen51582172014-02-27 15:17:27 +1100254
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500255 return true;
256}
257
258/*
259 * Calculate CRC on the whole btree block and stuff it into the
260 * short-form btree header.
261 *
262 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100263 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500264 * it to disk.
265 */
266void
267xfs_btree_sblock_calc_crc(
268 struct xfs_buf *bp)
269{
270 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
271 struct xfs_buf_log_item *bip = bp->b_fspriv;
272
273 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
274 return;
275 if (bip)
276 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100277 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500278}
279
280bool
281xfs_btree_sblock_verify_crc(
282 struct xfs_buf *bp)
283{
Brian Fostera45086e2015-10-12 15:59:25 +1100284 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
285 struct xfs_mount *mp = bp->b_target->bt_mount;
286
287 if (xfs_sb_version_hascrc(&mp->m_sb)) {
288 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
289 return false;
Eric Sandeen51582172014-02-27 15:17:27 +1100290 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100291 }
Eric Sandeen51582172014-02-27 15:17:27 +1100292
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500293 return true;
294}
295
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100296static int
297xfs_btree_free_block(
298 struct xfs_btree_cur *cur,
299 struct xfs_buf *bp)
300{
301 int error;
302
303 error = cur->bc_ops->free_block(cur, bp);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100304 if (!error) {
305 xfs_trans_binval(cur->bc_tp, bp);
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100306 XFS_BTREE_STATS_INC(cur, free);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100307 }
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100308 return error;
309}
310
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500311/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 * Delete the btree cursor.
313 */
314void
315xfs_btree_del_cursor(
316 xfs_btree_cur_t *cur, /* btree cursor */
317 int error) /* del because of error */
318{
319 int i; /* btree level */
320
321 /*
322 * Clear the buffer pointers, and release the buffers.
323 * If we're doing this in the face of an error, we
324 * need to make sure to inspect all of the entries
325 * in the bc_bufs array for buffers to be unlocked.
326 * This is because some of the btree code works from
327 * level n down to 0, and if we get an error along
328 * the way we won't have initialized all the entries
329 * down to 0.
330 */
331 for (i = 0; i < cur->bc_nlevels; i++) {
332 if (cur->bc_bufs[i])
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000333 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 else if (!error)
335 break;
336 }
337 /*
338 * Can't free a bmap cursor without having dealt with the
339 * allocated indirect blocks' accounting.
340 */
341 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
342 cur->bc_private.b.allocated == 0);
343 /*
344 * Free the cursor.
345 */
346 kmem_zone_free(xfs_btree_cur_zone, cur);
347}
348
349/*
350 * Duplicate the btree cursor.
351 * Allocate a new one, copy the record, re-get the buffers.
352 */
353int /* error */
354xfs_btree_dup_cursor(
355 xfs_btree_cur_t *cur, /* input cursor */
356 xfs_btree_cur_t **ncur) /* output cursor */
357{
358 xfs_buf_t *bp; /* btree block's buffer pointer */
359 int error; /* error return value */
360 int i; /* level number of btree block */
361 xfs_mount_t *mp; /* mount structure for filesystem */
362 xfs_btree_cur_t *new; /* new cursor value */
363 xfs_trans_t *tp; /* transaction pointer, can be NULL */
364
365 tp = cur->bc_tp;
366 mp = cur->bc_mp;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100367
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 /*
369 * Allocate a new cursor like the old one.
370 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100371 new = cur->bc_ops->dup_cursor(cur);
372
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 /*
374 * Copy the record currently in the cursor.
375 */
376 new->bc_rec = cur->bc_rec;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100377
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 /*
379 * For each level current, re-get the buffer and copy the ptr value.
380 */
381 for (i = 0; i < new->bc_nlevels; i++) {
382 new->bc_ptrs[i] = cur->bc_ptrs[i];
383 new->bc_ra[i] = cur->bc_ra[i];
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100384 bp = cur->bc_bufs[i];
385 if (bp) {
386 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
387 XFS_BUF_ADDR(bp), mp->m_bsize,
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100388 0, &bp,
Dave Chinner1813dd62012-11-14 17:54:40 +1100389 cur->bc_ops->buf_ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100390 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 xfs_btree_del_cursor(new, error);
392 *ncur = NULL;
393 return error;
394 }
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500395 }
396 new->bc_bufs[i] = bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 *ncur = new;
399 return 0;
400}
401
402/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100403 * XFS btree block layout and addressing:
404 *
405 * There are two types of blocks in the btree: leaf and non-leaf blocks.
406 *
407 * The leaf record start with a header then followed by records containing
408 * the values. A non-leaf block also starts with the same header, and
409 * then first contains lookup keys followed by an equal number of pointers
410 * to the btree blocks at the previous level.
411 *
412 * +--------+-------+-------+-------+-------+-------+-------+
413 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
414 * +--------+-------+-------+-------+-------+-------+-------+
415 *
416 * +--------+-------+-------+-------+-------+-------+-------+
417 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
418 * +--------+-------+-------+-------+-------+-------+-------+
419 *
420 * The header is called struct xfs_btree_block for reasons better left unknown
421 * and comes in different versions for short (32bit) and long (64bit) block
422 * pointers. The record and key structures are defined by the btree instances
423 * and opaque to the btree core. The block pointers are simple disk endian
424 * integers, available in a short (32bit) and long (64bit) variant.
425 *
426 * The helpers below calculate the offset of a given record, key or pointer
427 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
428 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
429 * inside the btree block is done using indices starting at one, not zero!
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000430 *
431 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
432 * overlapping intervals. In such a tree, records are still sorted lowest to
433 * highest and indexed by the smallest key value that refers to the record.
434 * However, nodes are different: each pointer has two associated keys -- one
435 * indexing the lowest key available in the block(s) below (the same behavior
436 * as the key in a regular btree) and another indexing the highest key
437 * available in the block(s) below. Because records are /not/ sorted by the
438 * highest key, all leaf block updates require us to compute the highest key
439 * that matches any record in the leaf and to recursively update the high keys
440 * in the nodes going further up in the tree, if necessary. Nodes look like
441 * this:
442 *
443 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
444 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
445 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
446 *
447 * To perform an interval query on an overlapped tree, perform the usual
448 * depth-first search and use the low and high keys to decide if we can skip
449 * that particular node. If a leaf node is reached, return the records that
450 * intersect the interval. Note that an interval query may return numerous
451 * entries. For a non-overlapped tree, simply search for the record associated
452 * with the lowest key and iterate forward until a non-matching record is
453 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
454 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
455 * more detail.
456 *
457 * Why do we care about overlapping intervals? Let's say you have a bunch of
458 * reverse mapping records on a reflink filesystem:
459 *
460 * 1: +- file A startblock B offset C length D -----------+
461 * 2: +- file E startblock F offset G length H --------------+
462 * 3: +- file I startblock F offset J length K --+
463 * 4: +- file L... --+
464 *
465 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
466 * we'd simply increment the length of record 1. But how do we find the record
467 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
468 * record 3 because the keys are ordered first by startblock. An interval
469 * query would return records 1 and 2 because they both overlap (B+D-1), and
470 * from that we can pick out record 1 as the appropriate left neighbor.
471 *
472 * In the non-overlapped case you can do a LE lookup and decrement the cursor
473 * because a record's interval must end before the next record.
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100474 */
475
476/*
477 * Return size of the btree block header for this btree instance.
478 */
479static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
480{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500481 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
482 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
483 return XFS_BTREE_LBLOCK_CRC_LEN;
484 return XFS_BTREE_LBLOCK_LEN;
485 }
486 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
487 return XFS_BTREE_SBLOCK_CRC_LEN;
488 return XFS_BTREE_SBLOCK_LEN;
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100489}
490
491/*
492 * Return size of btree block pointers for this btree instance.
493 */
494static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
495{
496 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
497 sizeof(__be64) : sizeof(__be32);
498}
499
500/*
501 * Calculate offset of the n-th record in a btree block.
502 */
503STATIC size_t
504xfs_btree_rec_offset(
505 struct xfs_btree_cur *cur,
506 int n)
507{
508 return xfs_btree_block_len(cur) +
509 (n - 1) * cur->bc_ops->rec_len;
510}
511
512/*
513 * Calculate offset of the n-th key in a btree block.
514 */
515STATIC size_t
516xfs_btree_key_offset(
517 struct xfs_btree_cur *cur,
518 int n)
519{
520 return xfs_btree_block_len(cur) +
521 (n - 1) * cur->bc_ops->key_len;
522}
523
524/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000525 * Calculate offset of the n-th high key in a btree block.
526 */
527STATIC size_t
528xfs_btree_high_key_offset(
529 struct xfs_btree_cur *cur,
530 int n)
531{
532 return xfs_btree_block_len(cur) +
533 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
534}
535
536/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100537 * Calculate offset of the n-th block pointer in a btree block.
538 */
539STATIC size_t
540xfs_btree_ptr_offset(
541 struct xfs_btree_cur *cur,
542 int n,
543 int level)
544{
545 return xfs_btree_block_len(cur) +
546 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
547 (n - 1) * xfs_btree_ptr_len(cur);
548}
549
550/*
551 * Return a pointer to the n-th record in the btree block.
552 */
553STATIC union xfs_btree_rec *
554xfs_btree_rec_addr(
555 struct xfs_btree_cur *cur,
556 int n,
557 struct xfs_btree_block *block)
558{
559 return (union xfs_btree_rec *)
560 ((char *)block + xfs_btree_rec_offset(cur, n));
561}
562
563/*
564 * Return a pointer to the n-th key in the btree block.
565 */
566STATIC union xfs_btree_key *
567xfs_btree_key_addr(
568 struct xfs_btree_cur *cur,
569 int n,
570 struct xfs_btree_block *block)
571{
572 return (union xfs_btree_key *)
573 ((char *)block + xfs_btree_key_offset(cur, n));
574}
575
576/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000577 * Return a pointer to the n-th high key in the btree block.
578 */
579STATIC union xfs_btree_key *
580xfs_btree_high_key_addr(
581 struct xfs_btree_cur *cur,
582 int n,
583 struct xfs_btree_block *block)
584{
585 return (union xfs_btree_key *)
586 ((char *)block + xfs_btree_high_key_offset(cur, n));
587}
588
589/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100590 * Return a pointer to the n-th block pointer in the btree block.
591 */
592STATIC union xfs_btree_ptr *
593xfs_btree_ptr_addr(
594 struct xfs_btree_cur *cur,
595 int n,
596 struct xfs_btree_block *block)
597{
598 int level = xfs_btree_get_level(block);
599
600 ASSERT(block->bb_level != 0);
601
602 return (union xfs_btree_ptr *)
603 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
604}
605
606/*
Zhi Yong Wu1cb93862013-08-07 10:11:05 +0000607 * Get the root block which is stored in the inode.
Christoph Hellwig8186e512008-10-30 16:54:22 +1100608 *
609 * For now this btree implementation assumes the btree root is always
610 * stored in the if_broot field of an inode fork.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 */
Christoph Hellwig8186e512008-10-30 16:54:22 +1100612STATIC struct xfs_btree_block *
613xfs_btree_get_iroot(
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000614 struct xfs_btree_cur *cur)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000616 struct xfs_ifork *ifp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000618 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
619 return (struct xfs_btree_block *)ifp->if_broot;
Christoph Hellwig8186e512008-10-30 16:54:22 +1100620}
621
622/*
623 * Retrieve the block pointer from the cursor at the given level.
624 * This may be an inode btree root or from a buffer.
625 */
626STATIC struct xfs_btree_block * /* generic btree block pointer */
627xfs_btree_get_block(
628 struct xfs_btree_cur *cur, /* btree cursor */
629 int level, /* level in btree */
630 struct xfs_buf **bpp) /* buffer containing the block */
631{
632 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
633 (level == cur->bc_nlevels - 1)) {
634 *bpp = NULL;
635 return xfs_btree_get_iroot(cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 }
Christoph Hellwig8186e512008-10-30 16:54:22 +1100637
638 *bpp = cur->bc_bufs[level];
639 return XFS_BUF_TO_BLOCK(*bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640}
641
642/*
643 * Get a buffer for the block, return it with no data read.
644 * Long-form addressing.
645 */
646xfs_buf_t * /* buffer for fsbno */
647xfs_btree_get_bufl(
648 xfs_mount_t *mp, /* file system mount point */
649 xfs_trans_t *tp, /* transaction pointer */
650 xfs_fsblock_t fsbno, /* file system block number */
651 uint lock) /* lock flags for get_buf */
652{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 xfs_daddr_t d; /* real disk block address */
654
655 ASSERT(fsbno != NULLFSBLOCK);
656 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000657 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658}
659
660/*
661 * Get a buffer for the block, return it with no data read.
662 * Short-form addressing.
663 */
664xfs_buf_t * /* buffer for agno/agbno */
665xfs_btree_get_bufs(
666 xfs_mount_t *mp, /* file system mount point */
667 xfs_trans_t *tp, /* transaction pointer */
668 xfs_agnumber_t agno, /* allocation group number */
669 xfs_agblock_t agbno, /* allocation group block number */
670 uint lock) /* lock flags for get_buf */
671{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 xfs_daddr_t d; /* real disk block address */
673
674 ASSERT(agno != NULLAGNUMBER);
675 ASSERT(agbno != NULLAGBLOCK);
676 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000677 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678}
679
680/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 * Check for the cursor referring to the last block at the given level.
682 */
683int /* 1=is last block, 0=not last block */
684xfs_btree_islastblock(
685 xfs_btree_cur_t *cur, /* btree cursor */
686 int level) /* level to check */
687{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100688 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 xfs_buf_t *bp; /* buffer containing block */
690
691 block = xfs_btree_get_block(cur, level, &bp);
692 xfs_btree_check_block(cur, block, level, bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100693 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000694 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200696 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697}
698
699/*
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000700 * Change the cursor to point to the first record at the given level.
701 * Other levels are unaffected.
702 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100703STATIC int /* success=1, failure=0 */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000704xfs_btree_firstrec(
705 xfs_btree_cur_t *cur, /* btree cursor */
706 int level) /* level to change */
707{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100708 struct xfs_btree_block *block; /* generic btree block pointer */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000709 xfs_buf_t *bp; /* buffer containing block */
710
711 /*
712 * Get the block pointer for this level.
713 */
714 block = xfs_btree_get_block(cur, level, &bp);
715 xfs_btree_check_block(cur, block, level, bp);
716 /*
717 * It's empty, there is no such record.
718 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100719 if (!block->bb_numrecs)
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000720 return 0;
721 /*
722 * Set the ptr value to 1, that's the first record/key.
723 */
724 cur->bc_ptrs[level] = 1;
725 return 1;
726}
727
728/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 * Change the cursor to point to the last record in the current block
730 * at the given level. Other levels are unaffected.
731 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100732STATIC int /* success=1, failure=0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733xfs_btree_lastrec(
734 xfs_btree_cur_t *cur, /* btree cursor */
735 int level) /* level to change */
736{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100737 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 xfs_buf_t *bp; /* buffer containing block */
739
740 /*
741 * Get the block pointer for this level.
742 */
743 block = xfs_btree_get_block(cur, level, &bp);
744 xfs_btree_check_block(cur, block, level, bp);
745 /*
746 * It's empty, there is no such record.
747 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100748 if (!block->bb_numrecs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 return 0;
750 /*
751 * Set the ptr value to numrecs, that's the last record/key.
752 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100753 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 return 1;
755}
756
757/*
758 * Compute first and last byte offsets for the fields given.
759 * Interprets the offsets table, which contains struct field offsets.
760 */
761void
762xfs_btree_offsets(
763 __int64_t fields, /* bitmask of fields */
764 const short *offsets, /* table of field offsets */
765 int nbits, /* number of bits to inspect */
766 int *first, /* output: first byte offset */
767 int *last) /* output: last byte offset */
768{
769 int i; /* current bit number */
770 __int64_t imask; /* mask for current bit number */
771
772 ASSERT(fields != 0);
773 /*
774 * Find the lowest bit, so the first byte offset.
775 */
776 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
777 if (imask & fields) {
778 *first = offsets[i];
779 break;
780 }
781 }
782 /*
783 * Find the highest bit, so the last byte offset.
784 */
785 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
786 if (imask & fields) {
787 *last = offsets[i + 1] - 1;
788 break;
789 }
790 }
791}
792
793/*
794 * Get a buffer for the block, return it read in.
795 * Long-form addressing.
796 */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100797int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798xfs_btree_read_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100799 struct xfs_mount *mp, /* file system mount point */
800 struct xfs_trans *tp, /* transaction pointer */
801 xfs_fsblock_t fsbno, /* file system block number */
802 uint lock, /* lock flags for read_buf */
803 struct xfs_buf **bpp, /* buffer for fsbno */
804 int refval, /* ref count value for buffer */
Dave Chinner1813dd62012-11-14 17:54:40 +1100805 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806{
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100807 struct xfs_buf *bp; /* return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 xfs_daddr_t d; /* real disk block address */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100809 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
811 ASSERT(fsbno != NULLFSBLOCK);
812 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100813 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
Dave Chinner1813dd62012-11-14 17:54:40 +1100814 mp->m_bsize, lock, &bp, ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100815 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 return error;
Dave Chinner821eb212010-12-02 16:31:13 +1100817 if (bp)
Christoph Hellwig38f23232011-10-10 16:52:45 +0000818 xfs_buf_set_ref(bp, refval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 *bpp = bp;
820 return 0;
821}
822
823/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 * Read-ahead the block, don't wait for it, don't return a buffer.
825 * Long-form addressing.
826 */
827/* ARGSUSED */
828void
829xfs_btree_reada_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100830 struct xfs_mount *mp, /* file system mount point */
831 xfs_fsblock_t fsbno, /* file system block number */
832 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100833 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834{
835 xfs_daddr_t d;
836
837 ASSERT(fsbno != NULLFSBLOCK);
838 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100839 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840}
841
842/*
843 * Read-ahead the block, don't wait for it, don't return a buffer.
844 * Short-form addressing.
845 */
846/* ARGSUSED */
847void
848xfs_btree_reada_bufs(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100849 struct xfs_mount *mp, /* file system mount point */
850 xfs_agnumber_t agno, /* allocation group number */
851 xfs_agblock_t agbno, /* allocation group block number */
852 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100853 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854{
855 xfs_daddr_t d;
856
857 ASSERT(agno != NULLAGNUMBER);
858 ASSERT(agbno != NULLAGBLOCK);
859 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100860 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861}
862
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100863STATIC int
864xfs_btree_readahead_lblock(
865 struct xfs_btree_cur *cur,
866 int lr,
867 struct xfs_btree_block *block)
868{
869 int rval = 0;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000870 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
871 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100872
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000873 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100874 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100875 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100876 rval++;
877 }
878
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000879 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100880 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100881 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100882 rval++;
883 }
884
885 return rval;
886}
887
888STATIC int
889xfs_btree_readahead_sblock(
890 struct xfs_btree_cur *cur,
891 int lr,
892 struct xfs_btree_block *block)
893{
894 int rval = 0;
895 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
896 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
897
898
899 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
900 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100901 left, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100902 rval++;
903 }
904
905 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
906 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100907 right, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100908 rval++;
909 }
910
911 return rval;
912}
913
Linus Torvalds1da177e2005-04-16 15:20:36 -0700914/*
915 * Read-ahead btree blocks, at the given level.
916 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
917 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100918STATIC int
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100919xfs_btree_readahead(
920 struct xfs_btree_cur *cur, /* btree cursor */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 int lev, /* level in btree */
922 int lr) /* left/right bits */
923{
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100924 struct xfs_btree_block *block;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100926 /*
927 * No readahead needed if we are at the root level and the
928 * btree root is stored in the inode.
929 */
930 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
931 (lev == cur->bc_nlevels - 1))
932 return 0;
933
934 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
935 return 0;
936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 cur->bc_ra[lev] |= lr;
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100938 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
939
940 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
941 return xfs_btree_readahead_lblock(cur, lr, block);
942 return xfs_btree_readahead_sblock(cur, lr, block);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943}
944
Dave Chinner21b5c972013-08-30 10:23:44 +1000945STATIC xfs_daddr_t
946xfs_btree_ptr_to_daddr(
947 struct xfs_btree_cur *cur,
948 union xfs_btree_ptr *ptr)
949{
950 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000951 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
Dave Chinner21b5c972013-08-30 10:23:44 +1000952
953 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
954 } else {
955 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
956 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
957
958 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
959 be32_to_cpu(ptr->s));
960 }
961}
962
963/*
964 * Readahead @count btree blocks at the given @ptr location.
965 *
966 * We don't need to care about long or short form btrees here as we have a
967 * method of converting the ptr directly to a daddr available to us.
968 */
969STATIC void
970xfs_btree_readahead_ptr(
971 struct xfs_btree_cur *cur,
972 union xfs_btree_ptr *ptr,
973 xfs_extlen_t count)
974{
975 xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
976 xfs_btree_ptr_to_daddr(cur, ptr),
977 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
978}
979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980/*
981 * Set the buffer for level "lev" in the cursor to bp, releasing
982 * any previous buffer.
983 */
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000984STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985xfs_btree_setbuf(
986 xfs_btree_cur_t *cur, /* btree cursor */
987 int lev, /* level in btree */
988 xfs_buf_t *bp) /* new buffer to set */
989{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100990 struct xfs_btree_block *b; /* btree block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000992 if (cur->bc_bufs[lev])
993 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 cur->bc_bufs[lev] = bp;
995 cur->bc_ra[lev] = 0;
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000996
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 b = XFS_BUF_TO_BLOCK(bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100998 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000999 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001001 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1003 } else {
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001004 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001006 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1008 }
1009}
Christoph Hellwig637aa502008-10-30 16:55:45 +11001010
1011STATIC int
1012xfs_btree_ptr_is_null(
1013 struct xfs_btree_cur *cur,
1014 union xfs_btree_ptr *ptr)
1015{
1016 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001017 return ptr->l == cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001018 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001019 return ptr->s == cpu_to_be32(NULLAGBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001020}
1021
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001022STATIC void
1023xfs_btree_set_ptr_null(
1024 struct xfs_btree_cur *cur,
1025 union xfs_btree_ptr *ptr)
1026{
1027 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001028 ptr->l = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001029 else
1030 ptr->s = cpu_to_be32(NULLAGBLOCK);
1031}
1032
Christoph Hellwig637aa502008-10-30 16:55:45 +11001033/*
1034 * Get/set/init sibling pointers
1035 */
1036STATIC void
1037xfs_btree_get_sibling(
1038 struct xfs_btree_cur *cur,
1039 struct xfs_btree_block *block,
1040 union xfs_btree_ptr *ptr,
1041 int lr)
1042{
1043 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1044
1045 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1046 if (lr == XFS_BB_RIGHTSIB)
1047 ptr->l = block->bb_u.l.bb_rightsib;
1048 else
1049 ptr->l = block->bb_u.l.bb_leftsib;
1050 } else {
1051 if (lr == XFS_BB_RIGHTSIB)
1052 ptr->s = block->bb_u.s.bb_rightsib;
1053 else
1054 ptr->s = block->bb_u.s.bb_leftsib;
1055 }
1056}
1057
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001058STATIC void
1059xfs_btree_set_sibling(
1060 struct xfs_btree_cur *cur,
1061 struct xfs_btree_block *block,
1062 union xfs_btree_ptr *ptr,
1063 int lr)
1064{
1065 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1066
1067 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1068 if (lr == XFS_BB_RIGHTSIB)
1069 block->bb_u.l.bb_rightsib = ptr->l;
1070 else
1071 block->bb_u.l.bb_leftsib = ptr->l;
1072 } else {
1073 if (lr == XFS_BB_RIGHTSIB)
1074 block->bb_u.s.bb_rightsib = ptr->s;
1075 else
1076 block->bb_u.s.bb_leftsib = ptr->s;
1077 }
1078}
1079
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001080void
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001081xfs_btree_init_block_int(
1082 struct xfs_mount *mp,
1083 struct xfs_btree_block *buf,
1084 xfs_daddr_t blkno,
1085 __u32 magic,
1086 __u16 level,
1087 __u16 numrecs,
1088 __u64 owner,
1089 unsigned int flags)
1090{
1091 buf->bb_magic = cpu_to_be32(magic);
1092 buf->bb_level = cpu_to_be16(level);
1093 buf->bb_numrecs = cpu_to_be16(numrecs);
1094
1095 if (flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001096 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1097 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001098 if (flags & XFS_BTREE_CRC_BLOCKS) {
1099 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1100 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001101 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001102 buf->bb_u.l.bb_pad = 0;
Dave Chinnerb58fa552013-08-28 21:22:46 +10001103 buf->bb_u.l.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001104 }
1105 } else {
1106 /* owner is a 32 bit value on short blocks */
1107 __u32 __owner = (__u32)owner;
1108
1109 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1110 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1111 if (flags & XFS_BTREE_CRC_BLOCKS) {
1112 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1113 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001114 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
Dave Chinnerb58fa552013-08-28 21:22:46 +10001115 buf->bb_u.s.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001116 }
1117 }
1118}
1119
1120void
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001121xfs_btree_init_block(
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001122 struct xfs_mount *mp,
1123 struct xfs_buf *bp,
1124 __u32 magic,
1125 __u16 level,
1126 __u16 numrecs,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001127 __u64 owner,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001128 unsigned int flags)
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001129{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001130 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1131 magic, level, numrecs, owner, flags);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001132}
1133
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001134STATIC void
1135xfs_btree_init_block_cur(
1136 struct xfs_btree_cur *cur,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001137 struct xfs_buf *bp,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001138 int level,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001139 int numrecs)
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001140{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001141 __u64 owner;
1142
1143 /*
1144 * we can pull the owner from the cursor right now as the different
1145 * owners align directly with the pointer size of the btree. This may
1146 * change in future, but is safe for current users of the generic btree
1147 * code.
1148 */
1149 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1150 owner = cur->bc_private.b.ip->i_ino;
1151 else
1152 owner = cur->bc_private.a.agno;
1153
1154 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1155 xfs_btree_magic(cur), level, numrecs,
1156 owner, cur->bc_flags);
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001157}
1158
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001159/*
1160 * Return true if ptr is the last record in the btree and
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001161 * we need to track updates to this record. The decision
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001162 * will be further refined in the update_lastrec method.
1163 */
1164STATIC int
1165xfs_btree_is_lastrec(
1166 struct xfs_btree_cur *cur,
1167 struct xfs_btree_block *block,
1168 int level)
1169{
1170 union xfs_btree_ptr ptr;
1171
1172 if (level > 0)
1173 return 0;
1174 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1175 return 0;
1176
1177 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1178 if (!xfs_btree_ptr_is_null(cur, &ptr))
1179 return 0;
1180 return 1;
1181}
1182
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001183STATIC void
1184xfs_btree_buf_to_ptr(
1185 struct xfs_btree_cur *cur,
1186 struct xfs_buf *bp,
1187 union xfs_btree_ptr *ptr)
1188{
1189 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1190 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1191 XFS_BUF_ADDR(bp)));
1192 else {
Eric Sandeen9d87c312009-01-14 23:22:07 -06001193 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001194 XFS_BUF_ADDR(bp)));
1195 }
1196}
1197
Christoph Hellwig637aa502008-10-30 16:55:45 +11001198STATIC void
1199xfs_btree_set_refs(
1200 struct xfs_btree_cur *cur,
1201 struct xfs_buf *bp)
1202{
1203 switch (cur->bc_btnum) {
1204 case XFS_BTNUM_BNO:
1205 case XFS_BTNUM_CNT:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001206 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001207 break;
1208 case XFS_BTNUM_INO:
Brian Fosteraafc3c22014-04-24 16:00:52 +10001209 case XFS_BTNUM_FINO:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001210 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001211 break;
1212 case XFS_BTNUM_BMAP:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001213 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001214 break;
1215 default:
1216 ASSERT(0);
1217 }
1218}
1219
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001220STATIC int
1221xfs_btree_get_buf_block(
1222 struct xfs_btree_cur *cur,
1223 union xfs_btree_ptr *ptr,
1224 int flags,
1225 struct xfs_btree_block **block,
1226 struct xfs_buf **bpp)
1227{
1228 struct xfs_mount *mp = cur->bc_mp;
1229 xfs_daddr_t d;
1230
1231 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001232 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001233
1234 d = xfs_btree_ptr_to_daddr(cur, ptr);
1235 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1236 mp->m_bsize, flags);
1237
Chandra Seetharaman2a30f36d2011-09-20 13:56:55 +00001238 if (!*bpp)
Dave Chinner24513372014-06-25 14:58:08 +10001239 return -ENOMEM;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001240
Dave Chinner1813dd62012-11-14 17:54:40 +11001241 (*bpp)->b_ops = cur->bc_ops->buf_ops;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001242 *block = XFS_BUF_TO_BLOCK(*bpp);
1243 return 0;
1244}
1245
Christoph Hellwig637aa502008-10-30 16:55:45 +11001246/*
1247 * Read in the buffer at the given ptr and return the buffer and
1248 * the block pointer within the buffer.
1249 */
1250STATIC int
1251xfs_btree_read_buf_block(
1252 struct xfs_btree_cur *cur,
1253 union xfs_btree_ptr *ptr,
Christoph Hellwig637aa502008-10-30 16:55:45 +11001254 int flags,
1255 struct xfs_btree_block **block,
1256 struct xfs_buf **bpp)
1257{
1258 struct xfs_mount *mp = cur->bc_mp;
1259 xfs_daddr_t d;
1260 int error;
1261
1262 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001263 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwig637aa502008-10-30 16:55:45 +11001264
1265 d = xfs_btree_ptr_to_daddr(cur, ptr);
1266 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001267 mp->m_bsize, flags, bpp,
Dave Chinner1813dd62012-11-14 17:54:40 +11001268 cur->bc_ops->buf_ops);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001269 if (error)
1270 return error;
1271
Christoph Hellwig637aa502008-10-30 16:55:45 +11001272 xfs_btree_set_refs(cur, *bpp);
1273 *block = XFS_BUF_TO_BLOCK(*bpp);
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001274 return 0;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001275}
1276
1277/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001278 * Copy keys from one btree block to another.
1279 */
1280STATIC void
1281xfs_btree_copy_keys(
1282 struct xfs_btree_cur *cur,
1283 union xfs_btree_key *dst_key,
1284 union xfs_btree_key *src_key,
1285 int numkeys)
1286{
1287 ASSERT(numkeys >= 0);
1288 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1289}
1290
1291/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001292 * Copy records from one btree block to another.
1293 */
1294STATIC void
1295xfs_btree_copy_recs(
1296 struct xfs_btree_cur *cur,
1297 union xfs_btree_rec *dst_rec,
1298 union xfs_btree_rec *src_rec,
1299 int numrecs)
1300{
1301 ASSERT(numrecs >= 0);
1302 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1303}
1304
1305/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001306 * Copy block pointers from one btree block to another.
1307 */
1308STATIC void
1309xfs_btree_copy_ptrs(
1310 struct xfs_btree_cur *cur,
1311 union xfs_btree_ptr *dst_ptr,
1312 union xfs_btree_ptr *src_ptr,
1313 int numptrs)
1314{
1315 ASSERT(numptrs >= 0);
1316 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1317}
1318
1319/*
1320 * Shift keys one index left/right inside a single btree block.
1321 */
1322STATIC void
1323xfs_btree_shift_keys(
1324 struct xfs_btree_cur *cur,
1325 union xfs_btree_key *key,
1326 int dir,
1327 int numkeys)
1328{
1329 char *dst_key;
1330
1331 ASSERT(numkeys >= 0);
1332 ASSERT(dir == 1 || dir == -1);
1333
1334 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1335 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1336}
1337
1338/*
1339 * Shift records one index left/right inside a single btree block.
1340 */
1341STATIC void
1342xfs_btree_shift_recs(
1343 struct xfs_btree_cur *cur,
1344 union xfs_btree_rec *rec,
1345 int dir,
1346 int numrecs)
1347{
1348 char *dst_rec;
1349
1350 ASSERT(numrecs >= 0);
1351 ASSERT(dir == 1 || dir == -1);
1352
1353 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1354 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1355}
1356
1357/*
1358 * Shift block pointers one index left/right inside a single btree block.
1359 */
1360STATIC void
1361xfs_btree_shift_ptrs(
1362 struct xfs_btree_cur *cur,
1363 union xfs_btree_ptr *ptr,
1364 int dir,
1365 int numptrs)
1366{
1367 char *dst_ptr;
1368
1369 ASSERT(numptrs >= 0);
1370 ASSERT(dir == 1 || dir == -1);
1371
1372 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1373 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1374}
1375
1376/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001377 * Log key values from the btree block.
1378 */
1379STATIC void
1380xfs_btree_log_keys(
1381 struct xfs_btree_cur *cur,
1382 struct xfs_buf *bp,
1383 int first,
1384 int last)
1385{
1386 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1387 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1388
1389 if (bp) {
Dave Chinner61fe1352013-04-03 16:11:30 +11001390 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001391 xfs_trans_log_buf(cur->bc_tp, bp,
1392 xfs_btree_key_offset(cur, first),
1393 xfs_btree_key_offset(cur, last + 1) - 1);
1394 } else {
1395 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1396 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1397 }
1398
1399 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1400}
1401
1402/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001403 * Log record values from the btree block.
1404 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001405void
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001406xfs_btree_log_recs(
1407 struct xfs_btree_cur *cur,
1408 struct xfs_buf *bp,
1409 int first,
1410 int last)
1411{
1412 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1413 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1414
Dave Chinner61fe1352013-04-03 16:11:30 +11001415 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001416 xfs_trans_log_buf(cur->bc_tp, bp,
1417 xfs_btree_rec_offset(cur, first),
1418 xfs_btree_rec_offset(cur, last + 1) - 1);
1419
1420 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1421}
1422
1423/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001424 * Log block pointer fields from a btree block (nonleaf).
1425 */
1426STATIC void
1427xfs_btree_log_ptrs(
1428 struct xfs_btree_cur *cur, /* btree cursor */
1429 struct xfs_buf *bp, /* buffer containing btree block */
1430 int first, /* index of first pointer to log */
1431 int last) /* index of last pointer to log */
1432{
1433 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1434 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1435
1436 if (bp) {
1437 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1438 int level = xfs_btree_get_level(block);
1439
Dave Chinner61fe1352013-04-03 16:11:30 +11001440 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001441 xfs_trans_log_buf(cur->bc_tp, bp,
1442 xfs_btree_ptr_offset(cur, first, level),
1443 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1444 } else {
1445 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1446 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1447 }
1448
1449 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1450}
1451
1452/*
1453 * Log fields from a btree block header.
1454 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001455void
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001456xfs_btree_log_block(
1457 struct xfs_btree_cur *cur, /* btree cursor */
1458 struct xfs_buf *bp, /* buffer containing btree block */
1459 int fields) /* mask of fields: XFS_BB_... */
1460{
1461 int first; /* first byte offset logged */
1462 int last; /* last byte offset logged */
1463 static const short soffsets[] = { /* table of offsets (short) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001464 offsetof(struct xfs_btree_block, bb_magic),
1465 offsetof(struct xfs_btree_block, bb_level),
1466 offsetof(struct xfs_btree_block, bb_numrecs),
1467 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1468 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001469 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1470 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1471 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1472 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1473 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1474 XFS_BTREE_SBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001475 };
1476 static const short loffsets[] = { /* table of offsets (long) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001477 offsetof(struct xfs_btree_block, bb_magic),
1478 offsetof(struct xfs_btree_block, bb_level),
1479 offsetof(struct xfs_btree_block, bb_numrecs),
1480 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1481 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001482 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1483 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1484 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1485 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1486 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1487 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1488 XFS_BTREE_LBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001489 };
1490
1491 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1492 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1493
1494 if (bp) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001495 int nbits;
1496
1497 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1498 /*
1499 * We don't log the CRC when updating a btree
1500 * block but instead recreate it during log
1501 * recovery. As the log buffers have checksums
1502 * of their own this is safe and avoids logging a crc
1503 * update in a lot of places.
1504 */
1505 if (fields == XFS_BB_ALL_BITS)
1506 fields = XFS_BB_ALL_BITS_CRC;
1507 nbits = XFS_BB_NUM_BITS_CRC;
1508 } else {
1509 nbits = XFS_BB_NUM_BITS;
1510 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001511 xfs_btree_offsets(fields,
1512 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1513 loffsets : soffsets,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001514 nbits, &first, &last);
Dave Chinner61fe1352013-04-03 16:11:30 +11001515 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001516 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1517 } else {
1518 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1519 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1520 }
1521
1522 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1523}
1524
1525/*
Christoph Hellwig637aa502008-10-30 16:55:45 +11001526 * Increment cursor by one record at the level.
1527 * For nonzero levels the leaf-ward information is untouched.
1528 */
1529int /* error */
1530xfs_btree_increment(
1531 struct xfs_btree_cur *cur,
1532 int level,
1533 int *stat) /* success/failure */
1534{
1535 struct xfs_btree_block *block;
1536 union xfs_btree_ptr ptr;
1537 struct xfs_buf *bp;
1538 int error; /* error return value */
1539 int lev;
1540
1541 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1542 XFS_BTREE_TRACE_ARGI(cur, level);
1543
1544 ASSERT(level < cur->bc_nlevels);
1545
1546 /* Read-ahead to the right at this level. */
1547 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1548
1549 /* Get a pointer to the btree block. */
1550 block = xfs_btree_get_block(cur, level, &bp);
1551
1552#ifdef DEBUG
1553 error = xfs_btree_check_block(cur, block, level, bp);
1554 if (error)
1555 goto error0;
1556#endif
1557
1558 /* We're done if we remain in the block after the increment. */
1559 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1560 goto out1;
1561
1562 /* Fail if we just went off the right edge of the tree. */
1563 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1564 if (xfs_btree_ptr_is_null(cur, &ptr))
1565 goto out0;
1566
1567 XFS_BTREE_STATS_INC(cur, increment);
1568
1569 /*
1570 * March up the tree incrementing pointers.
1571 * Stop when we don't go off the right edge of a block.
1572 */
1573 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1574 block = xfs_btree_get_block(cur, lev, &bp);
1575
1576#ifdef DEBUG
1577 error = xfs_btree_check_block(cur, block, lev, bp);
1578 if (error)
1579 goto error0;
1580#endif
1581
1582 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1583 break;
1584
1585 /* Read-ahead the right block for the next loop. */
1586 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1587 }
1588
1589 /*
1590 * If we went off the root then we are either seriously
1591 * confused or have the tree root in an inode.
1592 */
1593 if (lev == cur->bc_nlevels) {
1594 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1595 goto out0;
1596 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001597 error = -EFSCORRUPTED;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001598 goto error0;
1599 }
1600 ASSERT(lev < cur->bc_nlevels);
1601
1602 /*
1603 * Now walk back down the tree, fixing up the cursor's buffer
1604 * pointers and key numbers.
1605 */
1606 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1607 union xfs_btree_ptr *ptrp;
1608
1609 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001610 --lev;
1611 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001612 if (error)
1613 goto error0;
1614
1615 xfs_btree_setbuf(cur, lev, bp);
1616 cur->bc_ptrs[lev] = 1;
1617 }
1618out1:
1619 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1620 *stat = 1;
1621 return 0;
1622
1623out0:
1624 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1625 *stat = 0;
1626 return 0;
1627
1628error0:
1629 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1630 return error;
1631}
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001632
1633/*
1634 * Decrement cursor by one record at the level.
1635 * For nonzero levels the leaf-ward information is untouched.
1636 */
1637int /* error */
1638xfs_btree_decrement(
1639 struct xfs_btree_cur *cur,
1640 int level,
1641 int *stat) /* success/failure */
1642{
1643 struct xfs_btree_block *block;
1644 xfs_buf_t *bp;
1645 int error; /* error return value */
1646 int lev;
1647 union xfs_btree_ptr ptr;
1648
1649 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1650 XFS_BTREE_TRACE_ARGI(cur, level);
1651
1652 ASSERT(level < cur->bc_nlevels);
1653
1654 /* Read-ahead to the left at this level. */
1655 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1656
1657 /* We're done if we remain in the block after the decrement. */
1658 if (--cur->bc_ptrs[level] > 0)
1659 goto out1;
1660
1661 /* Get a pointer to the btree block. */
1662 block = xfs_btree_get_block(cur, level, &bp);
1663
1664#ifdef DEBUG
1665 error = xfs_btree_check_block(cur, block, level, bp);
1666 if (error)
1667 goto error0;
1668#endif
1669
1670 /* Fail if we just went off the left edge of the tree. */
1671 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1672 if (xfs_btree_ptr_is_null(cur, &ptr))
1673 goto out0;
1674
1675 XFS_BTREE_STATS_INC(cur, decrement);
1676
1677 /*
1678 * March up the tree decrementing pointers.
1679 * Stop when we don't go off the left edge of a block.
1680 */
1681 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1682 if (--cur->bc_ptrs[lev] > 0)
1683 break;
1684 /* Read-ahead the left block for the next loop. */
1685 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1686 }
1687
1688 /*
1689 * If we went off the root then we are seriously confused.
1690 * or the root of the tree is in an inode.
1691 */
1692 if (lev == cur->bc_nlevels) {
1693 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1694 goto out0;
1695 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001696 error = -EFSCORRUPTED;
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001697 goto error0;
1698 }
1699 ASSERT(lev < cur->bc_nlevels);
1700
1701 /*
1702 * Now walk back down the tree, fixing up the cursor's buffer
1703 * pointers and key numbers.
1704 */
1705 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1706 union xfs_btree_ptr *ptrp;
1707
1708 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001709 --lev;
1710 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001711 if (error)
1712 goto error0;
1713 xfs_btree_setbuf(cur, lev, bp);
1714 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1715 }
1716out1:
1717 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1718 *stat = 1;
1719 return 0;
1720
1721out0:
1722 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1723 *stat = 0;
1724 return 0;
1725
1726error0:
1727 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1728 return error;
1729}
1730
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001731STATIC int
1732xfs_btree_lookup_get_block(
1733 struct xfs_btree_cur *cur, /* btree cursor */
1734 int level, /* level in the btree */
1735 union xfs_btree_ptr *pp, /* ptr to btree block */
1736 struct xfs_btree_block **blkp) /* return btree block */
1737{
1738 struct xfs_buf *bp; /* buffer pointer for btree block */
1739 int error = 0;
1740
1741 /* special case the root block if in an inode */
1742 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1743 (level == cur->bc_nlevels - 1)) {
1744 *blkp = xfs_btree_get_iroot(cur);
1745 return 0;
1746 }
1747
1748 /*
1749 * If the old buffer at this level for the disk address we are
1750 * looking for re-use it.
1751 *
1752 * Otherwise throw it away and get a new one.
1753 */
1754 bp = cur->bc_bufs[level];
1755 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1756 *blkp = XFS_BUF_TO_BLOCK(bp);
1757 return 0;
1758 }
1759
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001760 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001761 if (error)
1762 return error;
1763
1764 xfs_btree_setbuf(cur, level, bp);
1765 return 0;
1766}
1767
1768/*
1769 * Get current search key. For level 0 we don't actually have a key
1770 * structure so we make one up from the record. For all other levels
1771 * we just return the right key.
1772 */
1773STATIC union xfs_btree_key *
1774xfs_lookup_get_search_key(
1775 struct xfs_btree_cur *cur,
1776 int level,
1777 int keyno,
1778 struct xfs_btree_block *block,
1779 union xfs_btree_key *kp)
1780{
1781 if (level == 0) {
1782 cur->bc_ops->init_key_from_rec(kp,
1783 xfs_btree_rec_addr(cur, keyno, block));
1784 return kp;
1785 }
1786
1787 return xfs_btree_key_addr(cur, keyno, block);
1788}
1789
1790/*
1791 * Lookup the record. The cursor is made to point to it, based on dir.
Zhi Yong Wu49d3da12013-08-07 10:11:00 +00001792 * stat is set to 0 if can't find any such record, 1 for success.
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001793 */
1794int /* error */
1795xfs_btree_lookup(
1796 struct xfs_btree_cur *cur, /* btree cursor */
1797 xfs_lookup_t dir, /* <=, ==, or >= */
1798 int *stat) /* success/failure */
1799{
1800 struct xfs_btree_block *block; /* current btree block */
1801 __int64_t diff; /* difference for the current key */
1802 int error; /* error return value */
1803 int keyno; /* current key number */
1804 int level; /* level in the btree */
1805 union xfs_btree_ptr *pp; /* ptr to btree block */
1806 union xfs_btree_ptr ptr; /* ptr to btree block */
1807
1808 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1809 XFS_BTREE_TRACE_ARGI(cur, dir);
1810
1811 XFS_BTREE_STATS_INC(cur, lookup);
1812
1813 block = NULL;
1814 keyno = 0;
1815
1816 /* initialise start pointer from cursor */
1817 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1818 pp = &ptr;
1819
1820 /*
1821 * Iterate over each level in the btree, starting at the root.
1822 * For each level above the leaves, find the key we need, based
1823 * on the lookup record, then follow the corresponding block
1824 * pointer down to the next level.
1825 */
1826 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1827 /* Get the block we need to do the lookup on. */
1828 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1829 if (error)
1830 goto error0;
1831
1832 if (diff == 0) {
1833 /*
1834 * If we already had a key match at a higher level, we
1835 * know we need to use the first entry in this block.
1836 */
1837 keyno = 1;
1838 } else {
1839 /* Otherwise search this block. Do a binary search. */
1840
1841 int high; /* high entry number */
1842 int low; /* low entry number */
1843
1844 /* Set low and high entry numbers, 1-based. */
1845 low = 1;
1846 high = xfs_btree_get_numrecs(block);
1847 if (!high) {
1848 /* Block is empty, must be an empty leaf. */
1849 ASSERT(level == 0 && cur->bc_nlevels == 1);
1850
1851 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1852 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1853 *stat = 0;
1854 return 0;
1855 }
1856
1857 /* Binary search the block. */
1858 while (low <= high) {
1859 union xfs_btree_key key;
1860 union xfs_btree_key *kp;
1861
1862 XFS_BTREE_STATS_INC(cur, compare);
1863
1864 /* keyno is average of low and high. */
1865 keyno = (low + high) >> 1;
1866
1867 /* Get current search key */
1868 kp = xfs_lookup_get_search_key(cur, level,
1869 keyno, block, &key);
1870
1871 /*
1872 * Compute difference to get next direction:
1873 * - less than, move right
1874 * - greater than, move left
1875 * - equal, we're done
1876 */
1877 diff = cur->bc_ops->key_diff(cur, kp);
1878 if (diff < 0)
1879 low = keyno + 1;
1880 else if (diff > 0)
1881 high = keyno - 1;
1882 else
1883 break;
1884 }
1885 }
1886
1887 /*
1888 * If there are more levels, set up for the next level
1889 * by getting the block number and filling in the cursor.
1890 */
1891 if (level > 0) {
1892 /*
1893 * If we moved left, need the previous key number,
1894 * unless there isn't one.
1895 */
1896 if (diff > 0 && --keyno < 1)
1897 keyno = 1;
1898 pp = xfs_btree_ptr_addr(cur, keyno, block);
1899
1900#ifdef DEBUG
1901 error = xfs_btree_check_ptr(cur, pp, 0, level);
1902 if (error)
1903 goto error0;
1904#endif
1905 cur->bc_ptrs[level] = keyno;
1906 }
1907 }
1908
1909 /* Done with the search. See if we need to adjust the results. */
1910 if (dir != XFS_LOOKUP_LE && diff < 0) {
1911 keyno++;
1912 /*
1913 * If ge search and we went off the end of the block, but it's
1914 * not the last block, we're in the wrong block.
1915 */
1916 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1917 if (dir == XFS_LOOKUP_GE &&
1918 keyno > xfs_btree_get_numrecs(block) &&
1919 !xfs_btree_ptr_is_null(cur, &ptr)) {
1920 int i;
1921
1922 cur->bc_ptrs[0] = keyno;
1923 error = xfs_btree_increment(cur, 0, &i);
1924 if (error)
1925 goto error0;
Eric Sandeen5fb5aee2015-02-23 22:39:13 +11001926 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001927 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1928 *stat = 1;
1929 return 0;
1930 }
1931 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1932 keyno--;
1933 cur->bc_ptrs[0] = keyno;
1934
1935 /* Return if we succeeded or not. */
1936 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1937 *stat = 0;
1938 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1939 *stat = 1;
1940 else
1941 *stat = 0;
1942 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1943 return 0;
1944
1945error0:
1946 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1947 return error;
1948}
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001949
Darrick J. Wong70b22652016-08-03 11:03:38 +10001950/* Determine the low key of a leaf block (simple) */
1951void
1952xfs_btree_get_leaf_keys(
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001953 struct xfs_btree_cur *cur,
Darrick J. Wong70b22652016-08-03 11:03:38 +10001954 struct xfs_btree_block *block,
1955 union xfs_btree_key *key)
1956{
1957 union xfs_btree_rec *rec;
1958
1959 rec = xfs_btree_rec_addr(cur, 1, block);
1960 cur->bc_ops->init_key_from_rec(key, rec);
1961}
1962
1963/* Determine the low key of a node block (simple) */
1964void
1965xfs_btree_get_node_keys(
1966 struct xfs_btree_cur *cur,
1967 struct xfs_btree_block *block,
1968 union xfs_btree_key *key)
1969{
1970 memcpy(key, xfs_btree_key_addr(cur, 1, block), cur->bc_ops->key_len);
1971}
1972
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10001973/* Find the high key storage area from a regular key. */
1974STATIC union xfs_btree_key *
1975xfs_btree_high_key_from_key(
1976 struct xfs_btree_cur *cur,
1977 union xfs_btree_key *key)
1978{
1979 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1980 return (union xfs_btree_key *)((char *)key +
1981 (cur->bc_ops->key_len / 2));
1982}
1983
1984/* Determine the low and high keys of a leaf block (overlapped) */
1985void
1986xfs_btree_get_leaf_keys_overlapped(
1987 struct xfs_btree_cur *cur,
1988 struct xfs_btree_block *block,
1989 union xfs_btree_key *key)
1990{
1991 int n;
1992 union xfs_btree_rec *rec;
1993 union xfs_btree_key max_hkey;
1994 union xfs_btree_key hkey;
1995 union xfs_btree_key *high;
1996
1997 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
1998 rec = xfs_btree_rec_addr(cur, 1, block);
1999 cur->bc_ops->init_key_from_rec(key, rec);
2000
2001 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2002 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2003 rec = xfs_btree_rec_addr(cur, n, block);
2004 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2005 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) > 0)
2006 max_hkey = hkey;
2007 }
2008
2009 high = xfs_btree_high_key_from_key(cur, key);
2010 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2011}
2012
2013/* Determine the low and high keys of a node block (overlapped) */
2014void
2015xfs_btree_get_node_keys_overlapped(
2016 struct xfs_btree_cur *cur,
2017 struct xfs_btree_block *block,
2018 union xfs_btree_key *key)
2019{
2020 int n;
2021 union xfs_btree_key *hkey;
2022 union xfs_btree_key *max_hkey;
2023 union xfs_btree_key *high;
2024
2025 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2026 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2027 cur->bc_ops->key_len / 2);
2028
2029 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2030 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2031 hkey = xfs_btree_high_key_addr(cur, n, block);
2032 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2033 max_hkey = hkey;
2034 }
2035
2036 high = xfs_btree_high_key_from_key(cur, key);
2037 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2038}
2039
Darrick J. Wong70b22652016-08-03 11:03:38 +10002040/* Derive the keys for any btree block. */
2041STATIC void
2042xfs_btree_get_keys(
2043 struct xfs_btree_cur *cur,
2044 struct xfs_btree_block *block,
2045 union xfs_btree_key *key)
2046{
2047 if (be16_to_cpu(block->bb_level) == 0)
2048 cur->bc_ops->get_leaf_keys(cur, block, key);
2049 else
2050 cur->bc_ops->get_node_keys(cur, block, key);
2051}
2052
2053/*
2054 * Decide if we need to update the parent keys of a btree block. For
2055 * a standard btree this is only necessary if we're updating the first
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002056 * record/key. For an overlapping btree, we must always update the
2057 * keys because the highest key can be in any of the records or keys
2058 * in the block.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002059 */
2060static inline bool
2061xfs_btree_needs_key_update(
2062 struct xfs_btree_cur *cur,
2063 int ptr)
2064{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002065 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2066}
2067
2068/*
2069 * Update the low and high parent keys of the given level, progressing
2070 * towards the root. If force_all is false, stop if the keys for a given
2071 * level do not need updating.
2072 */
2073STATIC int
2074__xfs_btree_updkeys(
2075 struct xfs_btree_cur *cur,
2076 int level,
2077 struct xfs_btree_block *block,
2078 struct xfs_buf *bp0,
2079 bool force_all)
2080{
2081 union xfs_btree_bigkey key; /* keys from current level */
2082 union xfs_btree_key *lkey; /* keys from the next level up */
2083 union xfs_btree_key *hkey;
2084 union xfs_btree_key *nlkey; /* keys from the next level up */
2085 union xfs_btree_key *nhkey;
2086 struct xfs_buf *bp;
2087 int ptr;
2088
2089 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2090
2091 /* Exit if there aren't any parent levels to update. */
2092 if (level + 1 >= cur->bc_nlevels)
2093 return 0;
2094
2095 trace_xfs_btree_updkeys(cur, level, bp0);
2096
2097 lkey = (union xfs_btree_key *)&key;
2098 hkey = xfs_btree_high_key_from_key(cur, lkey);
2099 xfs_btree_get_keys(cur, block, lkey);
2100 for (level++; level < cur->bc_nlevels; level++) {
2101#ifdef DEBUG
2102 int error;
2103#endif
2104 block = xfs_btree_get_block(cur, level, &bp);
2105 trace_xfs_btree_updkeys(cur, level, bp);
2106#ifdef DEBUG
2107 error = xfs_btree_check_block(cur, block, level, bp);
2108 if (error) {
2109 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2110 return error;
2111 }
2112#endif
2113 ptr = cur->bc_ptrs[level];
2114 nlkey = xfs_btree_key_addr(cur, ptr, block);
2115 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2116 if (!force_all &&
2117 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2118 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2119 break;
2120 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2121 xfs_btree_log_keys(cur, bp, ptr, ptr);
2122 if (level + 1 >= cur->bc_nlevels)
2123 break;
2124 cur->bc_ops->get_node_keys(cur, block, lkey);
2125 }
2126
2127 return 0;
2128}
2129
2130/*
2131 * Update all the keys from some level in cursor back to the root, stopping
2132 * when we find a key pair that don't need updating.
2133 */
2134int
2135xfs_btree_update_keys_overlapped(
2136 struct xfs_btree_cur *cur,
2137 int level)
2138{
2139 struct xfs_buf *bp;
2140 struct xfs_btree_block *block;
2141
2142 block = xfs_btree_get_block(cur, level, &bp);
2143 return __xfs_btree_updkeys(cur, level, block, bp, false);
2144}
2145
2146/* Update all the keys from some level in cursor back to the root. */
2147STATIC int
2148xfs_btree_updkeys_force(
2149 struct xfs_btree_cur *cur,
2150 int level)
2151{
2152 struct xfs_buf *bp;
2153 struct xfs_btree_block *block;
2154
2155 block = xfs_btree_get_block(cur, level, &bp);
2156 return __xfs_btree_updkeys(cur, level, block, bp, true);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002157}
2158
2159/*
2160 * Update the parent keys of the given level, progressing towards the root.
2161 */
2162int
2163xfs_btree_update_keys(
2164 struct xfs_btree_cur *cur,
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002165 int level)
2166{
2167 struct xfs_btree_block *block;
2168 struct xfs_buf *bp;
2169 union xfs_btree_key *kp;
Darrick J. Wong70b22652016-08-03 11:03:38 +10002170 union xfs_btree_key key;
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002171 int ptr;
2172
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002173 ASSERT(!(cur->bc_flags & XFS_BTREE_OVERLAPPING));
2174
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002175 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2176 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
2177
Darrick J. Wong70b22652016-08-03 11:03:38 +10002178 ASSERT(level >= 0);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002179
2180 /*
2181 * Go up the tree from this level toward the root.
2182 * At each level, update the key value to the value input.
2183 * Stop when we reach a level where the cursor isn't pointing
2184 * at the first entry in the block.
2185 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002186 block = xfs_btree_get_block(cur, level, &bp);
2187 xfs_btree_get_keys(cur, block, &key);
2188 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002189#ifdef DEBUG
2190 int error;
2191#endif
2192 block = xfs_btree_get_block(cur, level, &bp);
2193#ifdef DEBUG
2194 error = xfs_btree_check_block(cur, block, level, bp);
2195 if (error) {
2196 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2197 return error;
2198 }
2199#endif
2200 ptr = cur->bc_ptrs[level];
2201 kp = xfs_btree_key_addr(cur, ptr, block);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002202 xfs_btree_copy_keys(cur, kp, &key, 1);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002203 xfs_btree_log_keys(cur, bp, ptr, ptr);
2204 }
2205
2206 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2207 return 0;
2208}
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002209
2210/*
2211 * Update the record referred to by cur to the value in the
2212 * given record. This either works (return 0) or gets an
2213 * EFSCORRUPTED error.
2214 */
2215int
2216xfs_btree_update(
2217 struct xfs_btree_cur *cur,
2218 union xfs_btree_rec *rec)
2219{
2220 struct xfs_btree_block *block;
2221 struct xfs_buf *bp;
2222 int error;
2223 int ptr;
2224 union xfs_btree_rec *rp;
2225
2226 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2227 XFS_BTREE_TRACE_ARGR(cur, rec);
2228
2229 /* Pick up the current block. */
2230 block = xfs_btree_get_block(cur, 0, &bp);
2231
2232#ifdef DEBUG
2233 error = xfs_btree_check_block(cur, block, 0, bp);
2234 if (error)
2235 goto error0;
2236#endif
2237 /* Get the address of the rec to be updated. */
2238 ptr = cur->bc_ptrs[0];
2239 rp = xfs_btree_rec_addr(cur, ptr, block);
2240
2241 /* Fill in the new contents and log them. */
2242 xfs_btree_copy_recs(cur, rp, rec, 1);
2243 xfs_btree_log_recs(cur, bp, ptr, ptr);
2244
2245 /*
2246 * If we are tracking the last record in the tree and
2247 * we are at the far right edge of the tree, update it.
2248 */
2249 if (xfs_btree_is_lastrec(cur, block, 0)) {
2250 cur->bc_ops->update_lastrec(cur, block, rec,
2251 ptr, LASTREC_UPDATE);
2252 }
2253
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002254 /* Pass new key value up to our parent. */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002255 if (xfs_btree_needs_key_update(cur, ptr)) {
2256 error = cur->bc_ops->update_keys(cur, 0);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002257 if (error)
2258 goto error0;
2259 }
2260
2261 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2262 return 0;
2263
2264error0:
2265 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2266 return error;
2267}
2268
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002269/*
Christoph Hellwig687b8902008-10-30 16:56:53 +11002270 * Move 1 record left from cur/level if possible.
2271 * Update cur to reflect the new path.
2272 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002273STATIC int /* error */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002274xfs_btree_lshift(
2275 struct xfs_btree_cur *cur,
2276 int level,
2277 int *stat) /* success/failure */
2278{
2279 union xfs_btree_key key; /* btree key */
2280 struct xfs_buf *lbp; /* left buffer pointer */
2281 struct xfs_btree_block *left; /* left btree block */
2282 int lrecs; /* left record count */
2283 struct xfs_buf *rbp; /* right buffer pointer */
2284 struct xfs_btree_block *right; /* right btree block */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002285 struct xfs_btree_cur *tcur; /* temporary btree cursor */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002286 int rrecs; /* right record count */
2287 union xfs_btree_ptr lptr; /* left btree pointer */
2288 union xfs_btree_key *rkp = NULL; /* right btree key */
2289 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2290 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2291 int error; /* error return value */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002292 int i;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002293
2294 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2295 XFS_BTREE_TRACE_ARGI(cur, level);
2296
2297 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2298 level == cur->bc_nlevels - 1)
2299 goto out0;
2300
2301 /* Set up variables for this block as "right". */
2302 right = xfs_btree_get_block(cur, level, &rbp);
2303
2304#ifdef DEBUG
2305 error = xfs_btree_check_block(cur, right, level, rbp);
2306 if (error)
2307 goto error0;
2308#endif
2309
2310 /* If we've got no left sibling then we can't shift an entry left. */
2311 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2312 if (xfs_btree_ptr_is_null(cur, &lptr))
2313 goto out0;
2314
2315 /*
2316 * If the cursor entry is the one that would be moved, don't
2317 * do it... it's too complicated.
2318 */
2319 if (cur->bc_ptrs[level] <= 1)
2320 goto out0;
2321
2322 /* Set up the left neighbor as "left". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002323 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002324 if (error)
2325 goto error0;
2326
2327 /* If it's full, it can't take another entry. */
2328 lrecs = xfs_btree_get_numrecs(left);
2329 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2330 goto out0;
2331
2332 rrecs = xfs_btree_get_numrecs(right);
2333
2334 /*
2335 * We add one entry to the left side and remove one for the right side.
Malcolm Parsons9da096f2009-03-29 09:55:42 +02002336 * Account for it here, the changes will be updated on disk and logged
Christoph Hellwig687b8902008-10-30 16:56:53 +11002337 * later.
2338 */
2339 lrecs++;
2340 rrecs--;
2341
2342 XFS_BTREE_STATS_INC(cur, lshift);
2343 XFS_BTREE_STATS_ADD(cur, moves, 1);
2344
2345 /*
2346 * If non-leaf, copy a key and a ptr to the left block.
2347 * Log the changes to the left block.
2348 */
2349 if (level > 0) {
2350 /* It's a non-leaf. Move keys and pointers. */
2351 union xfs_btree_key *lkp; /* left btree key */
2352 union xfs_btree_ptr *lpp; /* left address pointer */
2353
2354 lkp = xfs_btree_key_addr(cur, lrecs, left);
2355 rkp = xfs_btree_key_addr(cur, 1, right);
2356
2357 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2358 rpp = xfs_btree_ptr_addr(cur, 1, right);
2359#ifdef DEBUG
2360 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2361 if (error)
2362 goto error0;
2363#endif
2364 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2365 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2366
2367 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2368 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2369
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002370 ASSERT(cur->bc_ops->keys_inorder(cur,
2371 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002372 } else {
2373 /* It's a leaf. Move records. */
2374 union xfs_btree_rec *lrp; /* left record pointer */
2375
2376 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2377 rrp = xfs_btree_rec_addr(cur, 1, right);
2378
2379 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2380 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2381
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002382 ASSERT(cur->bc_ops->recs_inorder(cur,
2383 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002384 }
2385
2386 xfs_btree_set_numrecs(left, lrecs);
2387 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2388
2389 xfs_btree_set_numrecs(right, rrecs);
2390 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2391
2392 /*
2393 * Slide the contents of right down one entry.
2394 */
2395 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2396 if (level > 0) {
2397 /* It's a nonleaf. operate on keys and ptrs */
2398#ifdef DEBUG
2399 int i; /* loop index */
2400
2401 for (i = 0; i < rrecs; i++) {
2402 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2403 if (error)
2404 goto error0;
2405 }
2406#endif
2407 xfs_btree_shift_keys(cur,
2408 xfs_btree_key_addr(cur, 2, right),
2409 -1, rrecs);
2410 xfs_btree_shift_ptrs(cur,
2411 xfs_btree_ptr_addr(cur, 2, right),
2412 -1, rrecs);
2413
2414 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2415 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2416 } else {
2417 /* It's a leaf. operate on records */
2418 xfs_btree_shift_recs(cur,
2419 xfs_btree_rec_addr(cur, 2, right),
2420 -1, rrecs);
2421 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2422
2423 /*
2424 * If it's the first record in the block, we'll need a key
2425 * structure to pass up to the next level (updkey).
2426 */
2427 cur->bc_ops->init_key_from_rec(&key,
2428 xfs_btree_rec_addr(cur, 1, right));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002429 }
2430
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002431 /*
2432 * Using a temporary cursor, update the parent key values of the
2433 * block on the left.
2434 */
2435 error = xfs_btree_dup_cursor(cur, &tcur);
2436 if (error)
2437 goto error0;
2438 i = xfs_btree_firstrec(tcur, level);
2439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
2440
2441 error = xfs_btree_decrement(tcur, level, &i);
2442 if (error)
2443 goto error1;
2444
Darrick J. Wong70b22652016-08-03 11:03:38 +10002445 /* Update the parent keys of the right block. */
2446 error = cur->bc_ops->update_keys(cur, level);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002447 if (error)
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002448 goto error1;
2449
2450 /* Update the parent high keys of the left block, if needed. */
2451 if (tcur->bc_flags & XFS_BTREE_OVERLAPPING) {
2452 error = tcur->bc_ops->update_keys(tcur, level);
2453 if (error)
2454 goto error1;
2455 }
2456
2457 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002458
2459 /* Slide the cursor value left one. */
2460 cur->bc_ptrs[level]--;
2461
2462 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2463 *stat = 1;
2464 return 0;
2465
2466out0:
2467 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2468 *stat = 0;
2469 return 0;
2470
2471error0:
2472 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2473 return error;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002474
2475error1:
2476 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2477 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2478 return error;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002479}
2480
2481/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002482 * Move 1 record right from cur/level if possible.
2483 * Update cur to reflect the new path.
2484 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002485STATIC int /* error */
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002486xfs_btree_rshift(
2487 struct xfs_btree_cur *cur,
2488 int level,
2489 int *stat) /* success/failure */
2490{
2491 union xfs_btree_key key; /* btree key */
2492 struct xfs_buf *lbp; /* left buffer pointer */
2493 struct xfs_btree_block *left; /* left btree block */
2494 struct xfs_buf *rbp; /* right buffer pointer */
2495 struct xfs_btree_block *right; /* right btree block */
2496 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2497 union xfs_btree_ptr rptr; /* right block pointer */
2498 union xfs_btree_key *rkp; /* right btree key */
2499 int rrecs; /* right record count */
2500 int lrecs; /* left record count */
2501 int error; /* error return value */
2502 int i; /* loop counter */
2503
2504 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2505 XFS_BTREE_TRACE_ARGI(cur, level);
2506
2507 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2508 (level == cur->bc_nlevels - 1))
2509 goto out0;
2510
2511 /* Set up variables for this block as "left". */
2512 left = xfs_btree_get_block(cur, level, &lbp);
2513
2514#ifdef DEBUG
2515 error = xfs_btree_check_block(cur, left, level, lbp);
2516 if (error)
2517 goto error0;
2518#endif
2519
2520 /* If we've got no right sibling then we can't shift an entry right. */
2521 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2522 if (xfs_btree_ptr_is_null(cur, &rptr))
2523 goto out0;
2524
2525 /*
2526 * If the cursor entry is the one that would be moved, don't
2527 * do it... it's too complicated.
2528 */
2529 lrecs = xfs_btree_get_numrecs(left);
2530 if (cur->bc_ptrs[level] >= lrecs)
2531 goto out0;
2532
2533 /* Set up the right neighbor as "right". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002534 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002535 if (error)
2536 goto error0;
2537
2538 /* If it's full, it can't take another entry. */
2539 rrecs = xfs_btree_get_numrecs(right);
2540 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2541 goto out0;
2542
2543 XFS_BTREE_STATS_INC(cur, rshift);
2544 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2545
2546 /*
2547 * Make a hole at the start of the right neighbor block, then
2548 * copy the last left block entry to the hole.
2549 */
2550 if (level > 0) {
2551 /* It's a nonleaf. make a hole in the keys and ptrs */
2552 union xfs_btree_key *lkp;
2553 union xfs_btree_ptr *lpp;
2554 union xfs_btree_ptr *rpp;
2555
2556 lkp = xfs_btree_key_addr(cur, lrecs, left);
2557 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2558 rkp = xfs_btree_key_addr(cur, 1, right);
2559 rpp = xfs_btree_ptr_addr(cur, 1, right);
2560
2561#ifdef DEBUG
2562 for (i = rrecs - 1; i >= 0; i--) {
2563 error = xfs_btree_check_ptr(cur, rpp, i, level);
2564 if (error)
2565 goto error0;
2566 }
2567#endif
2568
2569 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2570 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2571
2572#ifdef DEBUG
2573 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2574 if (error)
2575 goto error0;
2576#endif
2577
2578 /* Now put the new data in, and log it. */
2579 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2580 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2581
2582 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2583 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2584
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002585 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2586 xfs_btree_key_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002587 } else {
2588 /* It's a leaf. make a hole in the records */
2589 union xfs_btree_rec *lrp;
2590 union xfs_btree_rec *rrp;
2591
2592 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2593 rrp = xfs_btree_rec_addr(cur, 1, right);
2594
2595 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2596
2597 /* Now put the new data in, and log it. */
2598 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2599 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2600
2601 cur->bc_ops->init_key_from_rec(&key, rrp);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002602
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002603 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2604 xfs_btree_rec_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002605 }
2606
2607 /*
2608 * Decrement and log left's numrecs, bump and log right's numrecs.
2609 */
2610 xfs_btree_set_numrecs(left, --lrecs);
2611 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2612
2613 xfs_btree_set_numrecs(right, ++rrecs);
2614 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2615
2616 /*
2617 * Using a temporary cursor, update the parent key values of the
2618 * block on the right.
2619 */
2620 error = xfs_btree_dup_cursor(cur, &tcur);
2621 if (error)
2622 goto error0;
2623 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11002624 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002625
2626 error = xfs_btree_increment(tcur, level, &i);
2627 if (error)
2628 goto error1;
2629
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002630 /* Update the parent high keys of the left block, if needed. */
2631 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2632 error = cur->bc_ops->update_keys(cur, level);
2633 if (error)
2634 goto error1;
2635 }
2636
Darrick J. Wong70b22652016-08-03 11:03:38 +10002637 /* Update the parent keys of the right block. */
2638 error = cur->bc_ops->update_keys(tcur, level);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002639 if (error)
2640 goto error1;
2641
2642 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2643
2644 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2645 *stat = 1;
2646 return 0;
2647
2648out0:
2649 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2650 *stat = 0;
2651 return 0;
2652
2653error0:
2654 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2655 return error;
2656
2657error1:
2658 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2659 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2660 return error;
2661}
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002662
2663/*
2664 * Split cur/level block in half.
2665 * Return new block number and the key to its first
2666 * record (to be inserted into parent).
2667 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002668STATIC int /* error */
Dave Chinnercf11da92014-07-15 07:08:24 +10002669__xfs_btree_split(
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002670 struct xfs_btree_cur *cur,
2671 int level,
2672 union xfs_btree_ptr *ptrp,
2673 union xfs_btree_key *key,
2674 struct xfs_btree_cur **curp,
2675 int *stat) /* success/failure */
2676{
2677 union xfs_btree_ptr lptr; /* left sibling block ptr */
2678 struct xfs_buf *lbp; /* left buffer pointer */
2679 struct xfs_btree_block *left; /* left btree block */
2680 union xfs_btree_ptr rptr; /* right sibling block ptr */
2681 struct xfs_buf *rbp; /* right buffer pointer */
2682 struct xfs_btree_block *right; /* right btree block */
2683 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2684 struct xfs_buf *rrbp; /* right-right buffer pointer */
2685 struct xfs_btree_block *rrblock; /* right-right btree block */
2686 int lrecs;
2687 int rrecs;
2688 int src_index;
2689 int error; /* error return value */
2690#ifdef DEBUG
2691 int i;
2692#endif
2693
2694 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2695 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2696
2697 XFS_BTREE_STATS_INC(cur, split);
2698
2699 /* Set up left block (current one). */
2700 left = xfs_btree_get_block(cur, level, &lbp);
2701
2702#ifdef DEBUG
2703 error = xfs_btree_check_block(cur, left, level, lbp);
2704 if (error)
2705 goto error0;
2706#endif
2707
2708 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2709
2710 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002711 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002712 if (error)
2713 goto error0;
2714 if (*stat == 0)
2715 goto out0;
2716 XFS_BTREE_STATS_INC(cur, alloc);
2717
2718 /* Set up the new block as "right". */
2719 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2720 if (error)
2721 goto error0;
2722
2723 /* Fill in the btree header for the new right block. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002724 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002725
2726 /*
2727 * Split the entries between the old and the new block evenly.
2728 * Make sure that if there's an odd number of entries now, that
2729 * each new block will have the same number of entries.
2730 */
2731 lrecs = xfs_btree_get_numrecs(left);
2732 rrecs = lrecs / 2;
2733 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2734 rrecs++;
2735 src_index = (lrecs - rrecs + 1);
2736
2737 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2738
Darrick J. Wong70b22652016-08-03 11:03:38 +10002739 /* Adjust numrecs for the later get_*_keys() calls. */
2740 lrecs -= rrecs;
2741 xfs_btree_set_numrecs(left, lrecs);
2742 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2743
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002744 /*
2745 * Copy btree block entries from the left block over to the
2746 * new block, the right. Update the right block and log the
2747 * changes.
2748 */
2749 if (level > 0) {
2750 /* It's a non-leaf. Move keys and pointers. */
2751 union xfs_btree_key *lkp; /* left btree key */
2752 union xfs_btree_ptr *lpp; /* left address pointer */
2753 union xfs_btree_key *rkp; /* right btree key */
2754 union xfs_btree_ptr *rpp; /* right address pointer */
2755
2756 lkp = xfs_btree_key_addr(cur, src_index, left);
2757 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2758 rkp = xfs_btree_key_addr(cur, 1, right);
2759 rpp = xfs_btree_ptr_addr(cur, 1, right);
2760
2761#ifdef DEBUG
2762 for (i = src_index; i < rrecs; i++) {
2763 error = xfs_btree_check_ptr(cur, lpp, i, level);
2764 if (error)
2765 goto error0;
2766 }
2767#endif
2768
Darrick J. Wong70b22652016-08-03 11:03:38 +10002769 /* Copy the keys & pointers to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002770 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2771 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2772
2773 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2774 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2775
Darrick J. Wong70b22652016-08-03 11:03:38 +10002776 /* Stash the keys of the new block for later insertion. */
2777 cur->bc_ops->get_node_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002778 } else {
2779 /* It's a leaf. Move records. */
2780 union xfs_btree_rec *lrp; /* left record pointer */
2781 union xfs_btree_rec *rrp; /* right record pointer */
2782
2783 lrp = xfs_btree_rec_addr(cur, src_index, left);
2784 rrp = xfs_btree_rec_addr(cur, 1, right);
2785
Darrick J. Wong70b22652016-08-03 11:03:38 +10002786 /* Copy records to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002787 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2788 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2789
Darrick J. Wong70b22652016-08-03 11:03:38 +10002790 /* Stash the keys of the new block for later insertion. */
2791 cur->bc_ops->get_leaf_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002792 }
2793
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002794 /*
2795 * Find the left block number by looking in the buffer.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002796 * Adjust sibling pointers.
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002797 */
2798 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2799 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2800 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2801 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2802
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002803 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2804 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2805
2806 /*
2807 * If there's a block to the new block's right, make that block
2808 * point back to right instead of to left.
2809 */
2810 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002811 error = xfs_btree_read_buf_block(cur, &rrptr,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002812 0, &rrblock, &rrbp);
2813 if (error)
2814 goto error0;
2815 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2816 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2817 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002818
2819 /* Update the parent high keys of the left block, if needed. */
2820 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2821 error = cur->bc_ops->update_keys(cur, level);
2822 if (error)
2823 goto error0;
2824 }
2825
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002826 /*
2827 * If the cursor is really in the right block, move it there.
2828 * If it's just pointing past the last entry in left, then we'll
2829 * insert there, so don't change anything in that case.
2830 */
2831 if (cur->bc_ptrs[level] > lrecs + 1) {
2832 xfs_btree_setbuf(cur, level, rbp);
2833 cur->bc_ptrs[level] -= lrecs;
2834 }
2835 /*
2836 * If there are more levels, we'll need another cursor which refers
2837 * the right block, no matter where this cursor was.
2838 */
2839 if (level + 1 < cur->bc_nlevels) {
2840 error = xfs_btree_dup_cursor(cur, curp);
2841 if (error)
2842 goto error0;
2843 (*curp)->bc_ptrs[level + 1]++;
2844 }
2845 *ptrp = rptr;
2846 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2847 *stat = 1;
2848 return 0;
2849out0:
2850 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2851 *stat = 0;
2852 return 0;
2853
2854error0:
2855 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2856 return error;
2857}
Christoph Hellwig344207c2008-10-30 16:57:16 +11002858
Dave Chinnercf11da92014-07-15 07:08:24 +10002859struct xfs_btree_split_args {
2860 struct xfs_btree_cur *cur;
2861 int level;
2862 union xfs_btree_ptr *ptrp;
2863 union xfs_btree_key *key;
2864 struct xfs_btree_cur **curp;
2865 int *stat; /* success/failure */
2866 int result;
2867 bool kswapd; /* allocation in kswapd context */
2868 struct completion *done;
2869 struct work_struct work;
2870};
2871
2872/*
2873 * Stack switching interfaces for allocation
2874 */
2875static void
2876xfs_btree_split_worker(
2877 struct work_struct *work)
2878{
2879 struct xfs_btree_split_args *args = container_of(work,
2880 struct xfs_btree_split_args, work);
2881 unsigned long pflags;
2882 unsigned long new_pflags = PF_FSTRANS;
2883
2884 /*
2885 * we are in a transaction context here, but may also be doing work
2886 * in kswapd context, and hence we may need to inherit that state
2887 * temporarily to ensure that we don't block waiting for memory reclaim
2888 * in any way.
2889 */
2890 if (args->kswapd)
2891 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2892
2893 current_set_flags_nested(&pflags, new_pflags);
2894
2895 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2896 args->key, args->curp, args->stat);
2897 complete(args->done);
2898
2899 current_restore_flags_nested(&pflags, new_pflags);
2900}
2901
2902/*
2903 * BMBT split requests often come in with little stack to work on. Push
2904 * them off to a worker thread so there is lots of stack to use. For the other
2905 * btree types, just call directly to avoid the context switch overhead here.
2906 */
2907STATIC int /* error */
2908xfs_btree_split(
2909 struct xfs_btree_cur *cur,
2910 int level,
2911 union xfs_btree_ptr *ptrp,
2912 union xfs_btree_key *key,
2913 struct xfs_btree_cur **curp,
2914 int *stat) /* success/failure */
2915{
2916 struct xfs_btree_split_args args;
2917 DECLARE_COMPLETION_ONSTACK(done);
2918
2919 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2920 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2921
2922 args.cur = cur;
2923 args.level = level;
2924 args.ptrp = ptrp;
2925 args.key = key;
2926 args.curp = curp;
2927 args.stat = stat;
2928 args.done = &done;
2929 args.kswapd = current_is_kswapd();
2930 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2931 queue_work(xfs_alloc_wq, &args.work);
2932 wait_for_completion(&done);
2933 destroy_work_on_stack(&args.work);
2934 return args.result;
2935}
2936
2937
Christoph Hellwig344207c2008-10-30 16:57:16 +11002938/*
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002939 * Copy the old inode root contents into a real block and make the
2940 * broot point to it.
2941 */
2942int /* error */
2943xfs_btree_new_iroot(
2944 struct xfs_btree_cur *cur, /* btree cursor */
2945 int *logflags, /* logging flags for inode */
2946 int *stat) /* return status - 0 fail */
2947{
2948 struct xfs_buf *cbp; /* buffer for cblock */
2949 struct xfs_btree_block *block; /* btree block */
2950 struct xfs_btree_block *cblock; /* child btree block */
2951 union xfs_btree_key *ckp; /* child key pointer */
2952 union xfs_btree_ptr *cpp; /* child ptr pointer */
2953 union xfs_btree_key *kp; /* pointer to btree key */
2954 union xfs_btree_ptr *pp; /* pointer to block addr */
2955 union xfs_btree_ptr nptr; /* new block addr */
2956 int level; /* btree level */
2957 int error; /* error return code */
2958#ifdef DEBUG
2959 int i; /* loop counter */
2960#endif
2961
2962 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2963 XFS_BTREE_STATS_INC(cur, newroot);
2964
2965 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2966
2967 level = cur->bc_nlevels - 1;
2968
2969 block = xfs_btree_get_iroot(cur);
2970 pp = xfs_btree_ptr_addr(cur, 1, block);
2971
2972 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002973 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002974 if (error)
2975 goto error0;
2976 if (*stat == 0) {
2977 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2978 return 0;
2979 }
2980 XFS_BTREE_STATS_INC(cur, alloc);
2981
2982 /* Copy the root into a real block. */
2983 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2984 if (error)
2985 goto error0;
2986
Dave Chinner088c9f62013-06-12 12:19:08 +10002987 /*
2988 * we can't just memcpy() the root in for CRC enabled btree blocks.
2989 * In that case have to also ensure the blkno remains correct
2990 */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002991 memcpy(cblock, block, xfs_btree_block_len(cur));
Dave Chinner088c9f62013-06-12 12:19:08 +10002992 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2993 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2994 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2995 else
2996 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2997 }
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002998
2999 be16_add_cpu(&block->bb_level, 1);
3000 xfs_btree_set_numrecs(block, 1);
3001 cur->bc_nlevels++;
3002 cur->bc_ptrs[level + 1] = 1;
3003
3004 kp = xfs_btree_key_addr(cur, 1, block);
3005 ckp = xfs_btree_key_addr(cur, 1, cblock);
3006 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
3007
3008 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3009#ifdef DEBUG
3010 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
3011 error = xfs_btree_check_ptr(cur, pp, i, level);
3012 if (error)
3013 goto error0;
3014 }
3015#endif
3016 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
3017
3018#ifdef DEBUG
3019 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
3020 if (error)
3021 goto error0;
3022#endif
3023 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
3024
3025 xfs_iroot_realloc(cur->bc_private.b.ip,
3026 1 - xfs_btree_get_numrecs(cblock),
3027 cur->bc_private.b.whichfork);
3028
3029 xfs_btree_setbuf(cur, level, cbp);
3030
3031 /*
3032 * Do all this logging at the end so that
3033 * the root is at the right level.
3034 */
3035 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
3036 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3037 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
3038
3039 *logflags |=
Eric Sandeen9d87c312009-01-14 23:22:07 -06003040 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11003041 *stat = 1;
3042 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3043 return 0;
3044error0:
3045 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3046 return error;
3047}
3048
3049/*
Christoph Hellwig344207c2008-10-30 16:57:16 +11003050 * Allocate a new root block, fill it in.
3051 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11003052STATIC int /* error */
Christoph Hellwig344207c2008-10-30 16:57:16 +11003053xfs_btree_new_root(
3054 struct xfs_btree_cur *cur, /* btree cursor */
3055 int *stat) /* success/failure */
3056{
3057 struct xfs_btree_block *block; /* one half of the old root block */
3058 struct xfs_buf *bp; /* buffer containing block */
3059 int error; /* error return value */
3060 struct xfs_buf *lbp; /* left buffer pointer */
3061 struct xfs_btree_block *left; /* left btree block */
3062 struct xfs_buf *nbp; /* new (root) buffer */
3063 struct xfs_btree_block *new; /* new (root) btree block */
3064 int nptr; /* new value for key index, 1 or 2 */
3065 struct xfs_buf *rbp; /* right buffer pointer */
3066 struct xfs_btree_block *right; /* right btree block */
3067 union xfs_btree_ptr rptr;
3068 union xfs_btree_ptr lptr;
3069
3070 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3071 XFS_BTREE_STATS_INC(cur, newroot);
3072
3073 /* initialise our start point from the cursor */
3074 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3075
3076 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10003077 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003078 if (error)
3079 goto error0;
3080 if (*stat == 0)
3081 goto out0;
3082 XFS_BTREE_STATS_INC(cur, alloc);
3083
3084 /* Set up the new block. */
3085 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3086 if (error)
3087 goto error0;
3088
3089 /* Set the root in the holding structure increasing the level by 1. */
3090 cur->bc_ops->set_root(cur, &lptr, 1);
3091
3092 /*
3093 * At the previous root level there are now two blocks: the old root,
3094 * and the new block generated when it was split. We don't know which
3095 * one the cursor is pointing at, so we set up variables "left" and
3096 * "right" for each case.
3097 */
3098 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3099
3100#ifdef DEBUG
3101 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3102 if (error)
3103 goto error0;
3104#endif
3105
3106 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3107 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3108 /* Our block is left, pick up the right block. */
3109 lbp = bp;
3110 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3111 left = block;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003112 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003113 if (error)
3114 goto error0;
3115 bp = rbp;
3116 nptr = 1;
3117 } else {
3118 /* Our block is right, pick up the left block. */
3119 rbp = bp;
3120 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3121 right = block;
3122 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003123 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003124 if (error)
3125 goto error0;
3126 bp = lbp;
3127 nptr = 2;
3128 }
Darrick J. Wong70b22652016-08-03 11:03:38 +10003129
Christoph Hellwig344207c2008-10-30 16:57:16 +11003130 /* Fill in the new block's btree header and log it. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05003131 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003132 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3133 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3134 !xfs_btree_ptr_is_null(cur, &rptr));
3135
3136 /* Fill in the key data in the new root. */
3137 if (xfs_btree_get_level(left) > 0) {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003138 /*
3139 * Get the keys for the left block's keys and put them directly
3140 * in the parent block. Do the same for the right block.
3141 */
3142 cur->bc_ops->get_node_keys(cur, left,
3143 xfs_btree_key_addr(cur, 1, new));
3144 cur->bc_ops->get_node_keys(cur, right,
3145 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003146 } else {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003147 /*
3148 * Get the keys for the left block's records and put them
3149 * directly in the parent block. Do the same for the right
3150 * block.
3151 */
3152 cur->bc_ops->get_leaf_keys(cur, left,
3153 xfs_btree_key_addr(cur, 1, new));
3154 cur->bc_ops->get_leaf_keys(cur, right,
3155 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003156 }
3157 xfs_btree_log_keys(cur, nbp, 1, 2);
3158
3159 /* Fill in the pointer data in the new root. */
3160 xfs_btree_copy_ptrs(cur,
3161 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3162 xfs_btree_copy_ptrs(cur,
3163 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3164 xfs_btree_log_ptrs(cur, nbp, 1, 2);
3165
3166 /* Fix up the cursor. */
3167 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3168 cur->bc_ptrs[cur->bc_nlevels] = nptr;
3169 cur->bc_nlevels++;
3170 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3171 *stat = 1;
3172 return 0;
3173error0:
3174 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3175 return error;
3176out0:
3177 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3178 *stat = 0;
3179 return 0;
3180}
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003181
3182STATIC int
3183xfs_btree_make_block_unfull(
3184 struct xfs_btree_cur *cur, /* btree cursor */
3185 int level, /* btree level */
3186 int numrecs,/* # of recs in block */
3187 int *oindex,/* old tree index */
3188 int *index, /* new tree index */
3189 union xfs_btree_ptr *nptr, /* new btree ptr */
3190 struct xfs_btree_cur **ncur, /* new btree cursor */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003191 union xfs_btree_key *key, /* key of new block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003192 int *stat)
3193{
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003194 int error = 0;
3195
3196 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3197 level == cur->bc_nlevels - 1) {
3198 struct xfs_inode *ip = cur->bc_private.b.ip;
3199
3200 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3201 /* A root block that can be made bigger. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003202 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
Darrick J. Wong0d309792016-08-03 11:01:25 +10003203 *stat = 1;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003204 } else {
3205 /* A root block that needs replacing */
3206 int logflags = 0;
3207
3208 error = xfs_btree_new_iroot(cur, &logflags, stat);
3209 if (error || *stat == 0)
3210 return error;
3211
3212 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3213 }
3214
3215 return 0;
3216 }
3217
3218 /* First, try shifting an entry to the right neighbor. */
3219 error = xfs_btree_rshift(cur, level, stat);
3220 if (error || *stat)
3221 return error;
3222
3223 /* Next, try shifting an entry to the left neighbor. */
3224 error = xfs_btree_lshift(cur, level, stat);
3225 if (error)
3226 return error;
3227
3228 if (*stat) {
3229 *oindex = *index = cur->bc_ptrs[level];
3230 return 0;
3231 }
3232
3233 /*
3234 * Next, try splitting the current block in half.
3235 *
3236 * If this works we have to re-set our variables because we
3237 * could be in a different block now.
3238 */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003239 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003240 if (error || *stat == 0)
3241 return error;
3242
3243
3244 *index = cur->bc_ptrs[level];
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003245 return 0;
3246}
3247
3248/*
3249 * Insert one record/level. Return information to the caller
3250 * allowing the next level up to proceed if necessary.
3251 */
3252STATIC int
3253xfs_btree_insrec(
3254 struct xfs_btree_cur *cur, /* btree cursor */
3255 int level, /* level to insert record at */
3256 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003257 union xfs_btree_rec *rec, /* record to insert */
3258 union xfs_btree_key *key, /* i/o: block key for ptrp */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003259 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3260 int *stat) /* success/failure */
3261{
3262 struct xfs_btree_block *block; /* btree block */
3263 struct xfs_buf *bp; /* buffer for block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003264 union xfs_btree_ptr nptr; /* new block ptr */
3265 struct xfs_btree_cur *ncur; /* new btree cursor */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003266 union xfs_btree_bigkey nkey; /* new block key */
3267 union xfs_btree_key *lkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003268 int optr; /* old key/record index */
3269 int ptr; /* key/record index */
3270 int numrecs;/* number of records */
3271 int error; /* error return value */
3272#ifdef DEBUG
3273 int i;
3274#endif
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003275 xfs_daddr_t old_bn;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003276
3277 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003278 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003279
3280 ncur = NULL;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003281 lkey = (union xfs_btree_key *)&nkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003282
3283 /*
3284 * If we have an external root pointer, and we've made it to the
3285 * root level, allocate a new root block and we're done.
3286 */
3287 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3288 (level >= cur->bc_nlevels)) {
3289 error = xfs_btree_new_root(cur, stat);
3290 xfs_btree_set_ptr_null(cur, ptrp);
3291
3292 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3293 return error;
3294 }
3295
3296 /* If we're off the left edge, return failure. */
3297 ptr = cur->bc_ptrs[level];
3298 if (ptr == 0) {
3299 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3300 *stat = 0;
3301 return 0;
3302 }
3303
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003304 optr = ptr;
3305
3306 XFS_BTREE_STATS_INC(cur, insrec);
3307
3308 /* Get pointers to the btree buffer and block. */
3309 block = xfs_btree_get_block(cur, level, &bp);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003310 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003311 numrecs = xfs_btree_get_numrecs(block);
3312
3313#ifdef DEBUG
3314 error = xfs_btree_check_block(cur, block, level, bp);
3315 if (error)
3316 goto error0;
3317
3318 /* Check that the new entry is being inserted in the right place. */
3319 if (ptr <= numrecs) {
3320 if (level == 0) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003321 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003322 xfs_btree_rec_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003323 } else {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003324 ASSERT(cur->bc_ops->keys_inorder(cur, key,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003325 xfs_btree_key_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003326 }
3327 }
3328#endif
3329
3330 /*
3331 * If the block is full, we can't insert the new entry until we
3332 * make the block un-full.
3333 */
3334 xfs_btree_set_ptr_null(cur, &nptr);
3335 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3336 error = xfs_btree_make_block_unfull(cur, level, numrecs,
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003337 &optr, &ptr, &nptr, &ncur, lkey, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003338 if (error || *stat == 0)
3339 goto error0;
3340 }
3341
3342 /*
3343 * The current block may have changed if the block was
3344 * previously full and we have just made space in it.
3345 */
3346 block = xfs_btree_get_block(cur, level, &bp);
3347 numrecs = xfs_btree_get_numrecs(block);
3348
3349#ifdef DEBUG
3350 error = xfs_btree_check_block(cur, block, level, bp);
3351 if (error)
3352 return error;
3353#endif
3354
3355 /*
3356 * At this point we know there's room for our new entry in the block
3357 * we're pointing at.
3358 */
3359 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3360
3361 if (level > 0) {
3362 /* It's a nonleaf. make a hole in the keys and ptrs */
3363 union xfs_btree_key *kp;
3364 union xfs_btree_ptr *pp;
3365
3366 kp = xfs_btree_key_addr(cur, ptr, block);
3367 pp = xfs_btree_ptr_addr(cur, ptr, block);
3368
3369#ifdef DEBUG
3370 for (i = numrecs - ptr; i >= 0; i--) {
3371 error = xfs_btree_check_ptr(cur, pp, i, level);
3372 if (error)
3373 return error;
3374 }
3375#endif
3376
3377 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3378 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3379
3380#ifdef DEBUG
3381 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3382 if (error)
3383 goto error0;
3384#endif
3385
3386 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003387 xfs_btree_copy_keys(cur, kp, key, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003388 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3389 numrecs++;
3390 xfs_btree_set_numrecs(block, numrecs);
3391 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3392 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3393#ifdef DEBUG
3394 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003395 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3396 xfs_btree_key_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003397 }
3398#endif
3399 } else {
3400 /* It's a leaf. make a hole in the records */
3401 union xfs_btree_rec *rp;
3402
3403 rp = xfs_btree_rec_addr(cur, ptr, block);
3404
3405 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3406
3407 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003408 xfs_btree_copy_recs(cur, rp, rec, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003409 xfs_btree_set_numrecs(block, ++numrecs);
3410 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3411#ifdef DEBUG
3412 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003413 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3414 xfs_btree_rec_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003415 }
3416#endif
3417 }
3418
3419 /* Log the new number of records in the btree header. */
3420 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3421
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003422 /*
3423 * If we just inserted into a new tree block, we have to
3424 * recalculate nkey here because nkey is out of date.
3425 *
3426 * Otherwise we're just updating an existing block (having shoved
3427 * some records into the new tree block), so use the regular key
3428 * update mechanism.
3429 */
3430 if (bp && bp->b_bn != old_bn) {
3431 xfs_btree_get_keys(cur, block, lkey);
3432 } else if (xfs_btree_needs_key_update(cur, optr)) {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003433 error = cur->bc_ops->update_keys(cur, level);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003434 if (error)
3435 goto error0;
3436 }
3437
3438 /*
3439 * If we are tracking the last record in the tree and
3440 * we are at the far right edge of the tree, update it.
3441 */
3442 if (xfs_btree_is_lastrec(cur, block, level)) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003443 cur->bc_ops->update_lastrec(cur, block, rec,
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003444 ptr, LASTREC_INSREC);
3445 }
3446
3447 /*
3448 * Return the new block number, if any.
3449 * If there is one, give back a record value and a cursor too.
3450 */
3451 *ptrp = nptr;
3452 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003453 xfs_btree_copy_keys(cur, key, lkey, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003454 *curp = ncur;
3455 }
3456
3457 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3458 *stat = 1;
3459 return 0;
3460
3461error0:
3462 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3463 return error;
3464}
3465
3466/*
3467 * Insert the record at the point referenced by cur.
3468 *
3469 * A multi-level split of the tree on insert will invalidate the original
3470 * cursor. All callers of this function should assume that the cursor is
3471 * no longer valid and revalidate it.
3472 */
3473int
3474xfs_btree_insert(
3475 struct xfs_btree_cur *cur,
3476 int *stat)
3477{
3478 int error; /* error return value */
3479 int i; /* result value, 0 for failure */
3480 int level; /* current level number in btree */
3481 union xfs_btree_ptr nptr; /* new block number (split result) */
3482 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3483 struct xfs_btree_cur *pcur; /* previous level's cursor */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003484 union xfs_btree_bigkey bkey; /* key of block to insert */
3485 union xfs_btree_key *key;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003486 union xfs_btree_rec rec; /* record to insert */
3487
3488 level = 0;
3489 ncur = NULL;
3490 pcur = cur;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003491 key = (union xfs_btree_key *)&bkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003492
3493 xfs_btree_set_ptr_null(cur, &nptr);
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003494
3495 /* Make a key out of the record data to be inserted, and save it. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003496 cur->bc_ops->init_rec_from_cur(cur, &rec);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003497 cur->bc_ops->init_key_from_rec(key, &rec);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003498
3499 /*
3500 * Loop going up the tree, starting at the leaf level.
3501 * Stop when we don't get a split block, that must mean that
3502 * the insert is finished with this level.
3503 */
3504 do {
3505 /*
3506 * Insert nrec/nptr into this level of the tree.
3507 * Note if we fail, nptr will be null.
3508 */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003509 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003510 &ncur, &i);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003511 if (error) {
3512 if (pcur != cur)
3513 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3514 goto error0;
3515 }
3516
Eric Sandeenc29aad42015-02-23 22:39:08 +11003517 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003518 level++;
3519
3520 /*
3521 * See if the cursor we just used is trash.
3522 * Can't trash the caller's cursor, but otherwise we should
3523 * if ncur is a new cursor or we're about to be done.
3524 */
3525 if (pcur != cur &&
3526 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3527 /* Save the state from the cursor before we trash it */
3528 if (cur->bc_ops->update_cursor)
3529 cur->bc_ops->update_cursor(pcur, cur);
3530 cur->bc_nlevels = pcur->bc_nlevels;
3531 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3532 }
3533 /* If we got a new cursor, switch to it. */
3534 if (ncur) {
3535 pcur = ncur;
3536 ncur = NULL;
3537 }
3538 } while (!xfs_btree_ptr_is_null(cur, &nptr));
3539
3540 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3541 *stat = i;
3542 return 0;
3543error0:
3544 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3545 return error;
3546}
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003547
3548/*
3549 * Try to merge a non-leaf block back into the inode root.
3550 *
3551 * Note: the killroot names comes from the fact that we're effectively
3552 * killing the old root block. But because we can't just delete the
3553 * inode we have to copy the single block it was pointing to into the
3554 * inode.
3555 */
Eric Sandeend96f8f82009-07-02 00:09:33 -05003556STATIC int
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003557xfs_btree_kill_iroot(
3558 struct xfs_btree_cur *cur)
3559{
3560 int whichfork = cur->bc_private.b.whichfork;
3561 struct xfs_inode *ip = cur->bc_private.b.ip;
3562 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3563 struct xfs_btree_block *block;
3564 struct xfs_btree_block *cblock;
3565 union xfs_btree_key *kp;
3566 union xfs_btree_key *ckp;
3567 union xfs_btree_ptr *pp;
3568 union xfs_btree_ptr *cpp;
3569 struct xfs_buf *cbp;
3570 int level;
3571 int index;
3572 int numrecs;
Christoph Hellwig196328e2016-02-08 14:58:07 +11003573 int error;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003574#ifdef DEBUG
3575 union xfs_btree_ptr ptr;
3576 int i;
3577#endif
3578
3579 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3580
3581 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3582 ASSERT(cur->bc_nlevels > 1);
3583
3584 /*
3585 * Don't deal with the root block needs to be a leaf case.
3586 * We're just going to turn the thing back into extents anyway.
3587 */
3588 level = cur->bc_nlevels - 1;
3589 if (level == 1)
3590 goto out0;
3591
3592 /*
3593 * Give up if the root has multiple children.
3594 */
3595 block = xfs_btree_get_iroot(cur);
3596 if (xfs_btree_get_numrecs(block) != 1)
3597 goto out0;
3598
3599 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3600 numrecs = xfs_btree_get_numrecs(cblock);
3601
3602 /*
3603 * Only do this if the next level will fit.
3604 * Then the data must be copied up to the inode,
3605 * instead of freeing the root you free the next level.
3606 */
3607 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3608 goto out0;
3609
3610 XFS_BTREE_STATS_INC(cur, killroot);
3611
3612#ifdef DEBUG
3613 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3614 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3615 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3616 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3617#endif
3618
3619 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3620 if (index) {
3621 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3622 cur->bc_private.b.whichfork);
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003623 block = ifp->if_broot;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003624 }
3625
3626 be16_add_cpu(&block->bb_numrecs, index);
3627 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3628
3629 kp = xfs_btree_key_addr(cur, 1, block);
3630 ckp = xfs_btree_key_addr(cur, 1, cblock);
3631 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3632
3633 pp = xfs_btree_ptr_addr(cur, 1, block);
3634 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3635#ifdef DEBUG
3636 for (i = 0; i < numrecs; i++) {
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003637 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3638 if (error) {
3639 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3640 return error;
3641 }
3642 }
3643#endif
3644 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3645
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003646 error = xfs_btree_free_block(cur, cbp);
Christoph Hellwig196328e2016-02-08 14:58:07 +11003647 if (error) {
3648 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3649 return error;
3650 }
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003651
3652 cur->bc_bufs[level - 1] = NULL;
3653 be16_add_cpu(&block->bb_level, -1);
3654 xfs_trans_log_inode(cur->bc_tp, ip,
Eric Sandeen9d87c312009-01-14 23:22:07 -06003655 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003656 cur->bc_nlevels--;
3657out0:
3658 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3659 return 0;
3660}
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003661
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003662/*
3663 * Kill the current root node, and replace it with it's only child node.
3664 */
3665STATIC int
3666xfs_btree_kill_root(
3667 struct xfs_btree_cur *cur,
3668 struct xfs_buf *bp,
3669 int level,
3670 union xfs_btree_ptr *newroot)
3671{
3672 int error;
3673
3674 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3675 XFS_BTREE_STATS_INC(cur, killroot);
3676
3677 /*
3678 * Update the root pointer, decreasing the level by 1 and then
3679 * free the old root.
3680 */
3681 cur->bc_ops->set_root(cur, newroot, -1);
3682
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003683 error = xfs_btree_free_block(cur, bp);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003684 if (error) {
3685 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3686 return error;
3687 }
3688
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003689 cur->bc_bufs[level] = NULL;
3690 cur->bc_ra[level] = 0;
3691 cur->bc_nlevels--;
3692
3693 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3694 return 0;
3695}
3696
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003697STATIC int
3698xfs_btree_dec_cursor(
3699 struct xfs_btree_cur *cur,
3700 int level,
3701 int *stat)
3702{
3703 int error;
3704 int i;
3705
3706 if (level > 0) {
3707 error = xfs_btree_decrement(cur, level, &i);
3708 if (error)
3709 return error;
3710 }
3711
3712 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3713 *stat = 1;
3714 return 0;
3715}
3716
3717/*
3718 * Single level of the btree record deletion routine.
3719 * Delete record pointed to by cur/level.
3720 * Remove the record from its block then rebalance the tree.
3721 * Return 0 for error, 1 for done, 2 to go on to the next level.
3722 */
3723STATIC int /* error */
3724xfs_btree_delrec(
3725 struct xfs_btree_cur *cur, /* btree cursor */
3726 int level, /* level removing record from */
3727 int *stat) /* fail/done/go-on */
3728{
3729 struct xfs_btree_block *block; /* btree block */
3730 union xfs_btree_ptr cptr; /* current block ptr */
3731 struct xfs_buf *bp; /* buffer for block */
3732 int error; /* error return value */
3733 int i; /* loop counter */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003734 union xfs_btree_ptr lptr; /* left sibling block ptr */
3735 struct xfs_buf *lbp; /* left buffer pointer */
3736 struct xfs_btree_block *left; /* left btree block */
3737 int lrecs = 0; /* left record count */
3738 int ptr; /* key/record index */
3739 union xfs_btree_ptr rptr; /* right sibling block ptr */
3740 struct xfs_buf *rbp; /* right buffer pointer */
3741 struct xfs_btree_block *right; /* right btree block */
3742 struct xfs_btree_block *rrblock; /* right-right btree block */
3743 struct xfs_buf *rrbp; /* right-right buffer pointer */
3744 int rrecs = 0; /* right record count */
3745 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3746 int numrecs; /* temporary numrec count */
3747
3748 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3749 XFS_BTREE_TRACE_ARGI(cur, level);
3750
3751 tcur = NULL;
3752
3753 /* Get the index of the entry being deleted, check for nothing there. */
3754 ptr = cur->bc_ptrs[level];
3755 if (ptr == 0) {
3756 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3757 *stat = 0;
3758 return 0;
3759 }
3760
3761 /* Get the buffer & block containing the record or key/ptr. */
3762 block = xfs_btree_get_block(cur, level, &bp);
3763 numrecs = xfs_btree_get_numrecs(block);
3764
3765#ifdef DEBUG
3766 error = xfs_btree_check_block(cur, block, level, bp);
3767 if (error)
3768 goto error0;
3769#endif
3770
3771 /* Fail if we're off the end of the block. */
3772 if (ptr > numrecs) {
3773 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3774 *stat = 0;
3775 return 0;
3776 }
3777
3778 XFS_BTREE_STATS_INC(cur, delrec);
3779 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3780
3781 /* Excise the entries being deleted. */
3782 if (level > 0) {
3783 /* It's a nonleaf. operate on keys and ptrs */
3784 union xfs_btree_key *lkp;
3785 union xfs_btree_ptr *lpp;
3786
3787 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3788 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3789
3790#ifdef DEBUG
3791 for (i = 0; i < numrecs - ptr; i++) {
3792 error = xfs_btree_check_ptr(cur, lpp, i, level);
3793 if (error)
3794 goto error0;
3795 }
3796#endif
3797
3798 if (ptr < numrecs) {
3799 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3800 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3801 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3802 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3803 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003804 } else {
3805 /* It's a leaf. operate on records */
3806 if (ptr < numrecs) {
3807 xfs_btree_shift_recs(cur,
3808 xfs_btree_rec_addr(cur, ptr + 1, block),
3809 -1, numrecs - ptr);
3810 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3811 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003812 }
3813
3814 /*
3815 * Decrement and log the number of entries in the block.
3816 */
3817 xfs_btree_set_numrecs(block, --numrecs);
3818 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3819
3820 /*
3821 * If we are tracking the last record in the tree and
3822 * we are at the far right edge of the tree, update it.
3823 */
3824 if (xfs_btree_is_lastrec(cur, block, level)) {
3825 cur->bc_ops->update_lastrec(cur, block, NULL,
3826 ptr, LASTREC_DELREC);
3827 }
3828
3829 /*
3830 * We're at the root level. First, shrink the root block in-memory.
3831 * Try to get rid of the next level down. If we can't then there's
3832 * nothing left to do.
3833 */
3834 if (level == cur->bc_nlevels - 1) {
3835 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3836 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3837 cur->bc_private.b.whichfork);
3838
3839 error = xfs_btree_kill_iroot(cur);
3840 if (error)
3841 goto error0;
3842
3843 error = xfs_btree_dec_cursor(cur, level, stat);
3844 if (error)
3845 goto error0;
3846 *stat = 1;
3847 return 0;
3848 }
3849
3850 /*
3851 * If this is the root level, and there's only one entry left,
3852 * and it's NOT the leaf level, then we can get rid of this
3853 * level.
3854 */
3855 if (numrecs == 1 && level > 0) {
3856 union xfs_btree_ptr *pp;
3857 /*
3858 * pp is still set to the first pointer in the block.
3859 * Make it the new root of the btree.
3860 */
3861 pp = xfs_btree_ptr_addr(cur, 1, block);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003862 error = xfs_btree_kill_root(cur, bp, level, pp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003863 if (error)
3864 goto error0;
3865 } else if (level > 0) {
3866 error = xfs_btree_dec_cursor(cur, level, stat);
3867 if (error)
3868 goto error0;
3869 }
3870 *stat = 1;
3871 return 0;
3872 }
3873
3874 /*
3875 * If we deleted the leftmost entry in the block, update the
3876 * key values above us in the tree.
3877 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003878 if (xfs_btree_needs_key_update(cur, ptr)) {
3879 error = cur->bc_ops->update_keys(cur, level);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003880 if (error)
3881 goto error0;
3882 }
3883
3884 /*
3885 * If the number of records remaining in the block is at least
3886 * the minimum, we're done.
3887 */
3888 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3889 error = xfs_btree_dec_cursor(cur, level, stat);
3890 if (error)
3891 goto error0;
3892 return 0;
3893 }
3894
3895 /*
3896 * Otherwise, we have to move some records around to keep the
3897 * tree balanced. Look at the left and right sibling blocks to
3898 * see if we can re-balance by moving only one record.
3899 */
3900 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3901 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3902
3903 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3904 /*
3905 * One child of root, need to get a chance to copy its contents
3906 * into the root and delete it. Can't go up to next level,
3907 * there's nothing to delete there.
3908 */
3909 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3910 xfs_btree_ptr_is_null(cur, &lptr) &&
3911 level == cur->bc_nlevels - 2) {
3912 error = xfs_btree_kill_iroot(cur);
3913 if (!error)
3914 error = xfs_btree_dec_cursor(cur, level, stat);
3915 if (error)
3916 goto error0;
3917 return 0;
3918 }
3919 }
3920
3921 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3922 !xfs_btree_ptr_is_null(cur, &lptr));
3923
3924 /*
3925 * Duplicate the cursor so our btree manipulations here won't
3926 * disrupt the next level up.
3927 */
3928 error = xfs_btree_dup_cursor(cur, &tcur);
3929 if (error)
3930 goto error0;
3931
3932 /*
3933 * If there's a right sibling, see if it's ok to shift an entry
3934 * out of it.
3935 */
3936 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3937 /*
3938 * Move the temp cursor to the last entry in the next block.
3939 * Actually any entry but the first would suffice.
3940 */
3941 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003942 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003943
3944 error = xfs_btree_increment(tcur, level, &i);
3945 if (error)
3946 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003947 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003948
3949 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003950 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003951
3952 /* Grab a pointer to the block. */
3953 right = xfs_btree_get_block(tcur, level, &rbp);
3954#ifdef DEBUG
3955 error = xfs_btree_check_block(tcur, right, level, rbp);
3956 if (error)
3957 goto error0;
3958#endif
3959 /* Grab the current block number, for future use. */
3960 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3961
3962 /*
3963 * If right block is full enough so that removing one entry
3964 * won't make it too empty, and left-shifting an entry out
3965 * of right to us works, we're done.
3966 */
3967 if (xfs_btree_get_numrecs(right) - 1 >=
3968 cur->bc_ops->get_minrecs(tcur, level)) {
3969 error = xfs_btree_lshift(tcur, level, &i);
3970 if (error)
3971 goto error0;
3972 if (i) {
3973 ASSERT(xfs_btree_get_numrecs(block) >=
3974 cur->bc_ops->get_minrecs(tcur, level));
3975
3976 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3977 tcur = NULL;
3978
3979 error = xfs_btree_dec_cursor(cur, level, stat);
3980 if (error)
3981 goto error0;
3982 return 0;
3983 }
3984 }
3985
3986 /*
3987 * Otherwise, grab the number of records in right for
3988 * future reference, and fix up the temp cursor to point
3989 * to our block again (last record).
3990 */
3991 rrecs = xfs_btree_get_numrecs(right);
3992 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3993 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003994 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003995
3996 error = xfs_btree_decrement(tcur, level, &i);
3997 if (error)
3998 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003999 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004000 }
4001 }
4002
4003 /*
4004 * If there's a left sibling, see if it's ok to shift an entry
4005 * out of it.
4006 */
4007 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
4008 /*
4009 * Move the temp cursor to the first entry in the
4010 * previous block.
4011 */
4012 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11004013 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004014
4015 error = xfs_btree_decrement(tcur, level, &i);
4016 if (error)
4017 goto error0;
4018 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11004019 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004020
4021 /* Grab a pointer to the block. */
4022 left = xfs_btree_get_block(tcur, level, &lbp);
4023#ifdef DEBUG
4024 error = xfs_btree_check_block(cur, left, level, lbp);
4025 if (error)
4026 goto error0;
4027#endif
4028 /* Grab the current block number, for future use. */
4029 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
4030
4031 /*
4032 * If left block is full enough so that removing one entry
4033 * won't make it too empty, and right-shifting an entry out
4034 * of left to us works, we're done.
4035 */
4036 if (xfs_btree_get_numrecs(left) - 1 >=
4037 cur->bc_ops->get_minrecs(tcur, level)) {
4038 error = xfs_btree_rshift(tcur, level, &i);
4039 if (error)
4040 goto error0;
4041 if (i) {
4042 ASSERT(xfs_btree_get_numrecs(block) >=
4043 cur->bc_ops->get_minrecs(tcur, level));
4044 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4045 tcur = NULL;
4046 if (level == 0)
4047 cur->bc_ptrs[0]++;
4048 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4049 *stat = 1;
4050 return 0;
4051 }
4052 }
4053
4054 /*
4055 * Otherwise, grab the number of records in right for
4056 * future reference.
4057 */
4058 lrecs = xfs_btree_get_numrecs(left);
4059 }
4060
4061 /* Delete the temp cursor, we're done with it. */
4062 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
4063 tcur = NULL;
4064
4065 /* If here, we need to do a join to keep the tree balanced. */
4066 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
4067
4068 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
4069 lrecs + xfs_btree_get_numrecs(block) <=
4070 cur->bc_ops->get_maxrecs(cur, level)) {
4071 /*
4072 * Set "right" to be the starting block,
4073 * "left" to be the left neighbor.
4074 */
4075 rptr = cptr;
4076 right = block;
4077 rbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004078 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004079 if (error)
4080 goto error0;
4081
4082 /*
4083 * If that won't work, see if we can join with the right neighbor block.
4084 */
4085 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4086 rrecs + xfs_btree_get_numrecs(block) <=
4087 cur->bc_ops->get_maxrecs(cur, level)) {
4088 /*
4089 * Set "left" to be the starting block,
4090 * "right" to be the right neighbor.
4091 */
4092 lptr = cptr;
4093 left = block;
4094 lbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004095 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004096 if (error)
4097 goto error0;
4098
4099 /*
4100 * Otherwise, we can't fix the imbalance.
4101 * Just return. This is probably a logic error, but it's not fatal.
4102 */
4103 } else {
4104 error = xfs_btree_dec_cursor(cur, level, stat);
4105 if (error)
4106 goto error0;
4107 return 0;
4108 }
4109
4110 rrecs = xfs_btree_get_numrecs(right);
4111 lrecs = xfs_btree_get_numrecs(left);
4112
4113 /*
4114 * We're now going to join "left" and "right" by moving all the stuff
4115 * in "right" to "left" and deleting "right".
4116 */
4117 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4118 if (level > 0) {
4119 /* It's a non-leaf. Move keys and pointers. */
4120 union xfs_btree_key *lkp; /* left btree key */
4121 union xfs_btree_ptr *lpp; /* left address pointer */
4122 union xfs_btree_key *rkp; /* right btree key */
4123 union xfs_btree_ptr *rpp; /* right address pointer */
4124
4125 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4126 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4127 rkp = xfs_btree_key_addr(cur, 1, right);
4128 rpp = xfs_btree_ptr_addr(cur, 1, right);
4129#ifdef DEBUG
4130 for (i = 1; i < rrecs; i++) {
4131 error = xfs_btree_check_ptr(cur, rpp, i, level);
4132 if (error)
4133 goto error0;
4134 }
4135#endif
4136 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4137 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4138
4139 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4140 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4141 } else {
4142 /* It's a leaf. Move records. */
4143 union xfs_btree_rec *lrp; /* left record pointer */
4144 union xfs_btree_rec *rrp; /* right record pointer */
4145
4146 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4147 rrp = xfs_btree_rec_addr(cur, 1, right);
4148
4149 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4150 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4151 }
4152
4153 XFS_BTREE_STATS_INC(cur, join);
4154
4155 /*
Malcolm Parsons9da096f2009-03-29 09:55:42 +02004156 * Fix up the number of records and right block pointer in the
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004157 * surviving block, and log it.
4158 */
4159 xfs_btree_set_numrecs(left, lrecs + rrecs);
4160 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4161 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4162 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4163
4164 /* If there is a right sibling, point it to the remaining block. */
4165 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4166 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004167 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004168 if (error)
4169 goto error0;
4170 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4171 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4172 }
4173
4174 /* Free the deleted block. */
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11004175 error = xfs_btree_free_block(cur, rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004176 if (error)
4177 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004178
4179 /*
4180 * If we joined with the left neighbor, set the buffer in the
4181 * cursor to the left block, and fix up the index.
4182 */
4183 if (bp != lbp) {
4184 cur->bc_bufs[level] = lbp;
4185 cur->bc_ptrs[level] += lrecs;
4186 cur->bc_ra[level] = 0;
4187 }
4188 /*
4189 * If we joined with the right neighbor and there's a level above
4190 * us, increment the cursor at that level.
4191 */
4192 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4193 (level + 1 < cur->bc_nlevels)) {
4194 error = xfs_btree_increment(cur, level + 1, &i);
4195 if (error)
4196 goto error0;
4197 }
4198
4199 /*
4200 * Readjust the ptr at this level if it's not a leaf, since it's
4201 * still pointing at the deletion point, which makes the cursor
4202 * inconsistent. If this makes the ptr 0, the caller fixes it up.
4203 * We can't use decrement because it would change the next level up.
4204 */
4205 if (level > 0)
4206 cur->bc_ptrs[level]--;
4207
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004208 /*
4209 * We combined blocks, so we have to update the parent keys if the
4210 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4211 * points to the old block so that the caller knows which record to
4212 * delete. Therefore, the caller must be savvy enough to call updkeys
4213 * for us if we return stat == 2. The other exit points from this
4214 * function don't require deletions further up the tree, so they can
4215 * call updkeys directly.
4216 */
4217
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004218 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4219 /* Return value means the next level up has something to do. */
4220 *stat = 2;
4221 return 0;
4222
4223error0:
4224 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4225 if (tcur)
4226 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4227 return error;
4228}
4229
4230/*
4231 * Delete the record pointed to by cur.
4232 * The cursor refers to the place where the record was (could be inserted)
4233 * when the operation returns.
4234 */
4235int /* error */
4236xfs_btree_delete(
4237 struct xfs_btree_cur *cur,
4238 int *stat) /* success/failure */
4239{
4240 int error; /* error return value */
4241 int level;
4242 int i;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004243 bool joined = false;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004244
4245 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
4246
4247 /*
4248 * Go up the tree, starting at leaf level.
4249 *
4250 * If 2 is returned then a join was done; go to the next level.
4251 * Otherwise we are done.
4252 */
4253 for (level = 0, i = 2; i == 2; level++) {
4254 error = xfs_btree_delrec(cur, level, &i);
4255 if (error)
4256 goto error0;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004257 if (i == 2)
4258 joined = true;
4259 }
4260
4261 /*
4262 * If we combined blocks as part of deleting the record, delrec won't
4263 * have updated the parent high keys so we have to do that here.
4264 */
4265 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4266 error = xfs_btree_updkeys_force(cur, 0);
4267 if (error)
4268 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004269 }
4270
4271 if (i == 0) {
4272 for (level = 1; level < cur->bc_nlevels; level++) {
4273 if (cur->bc_ptrs[level] == 0) {
4274 error = xfs_btree_decrement(cur, level, &i);
4275 if (error)
4276 goto error0;
4277 break;
4278 }
4279 }
4280 }
4281
4282 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
4283 *stat = i;
4284 return 0;
4285error0:
4286 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
4287 return error;
4288}
Christoph Hellwig8cc938f2008-10-30 16:58:11 +11004289
4290/*
4291 * Get the data from the pointed-to record.
4292 */
4293int /* error */
4294xfs_btree_get_rec(
4295 struct xfs_btree_cur *cur, /* btree cursor */
4296 union xfs_btree_rec **recp, /* output: btree record */
4297 int *stat) /* output: success/failure */
4298{
4299 struct xfs_btree_block *block; /* btree block */
4300 struct xfs_buf *bp; /* buffer pointer */
4301 int ptr; /* record number */
4302#ifdef DEBUG
4303 int error; /* error return value */
4304#endif
4305
4306 ptr = cur->bc_ptrs[0];
4307 block = xfs_btree_get_block(cur, 0, &bp);
4308
4309#ifdef DEBUG
4310 error = xfs_btree_check_block(cur, block, 0, bp);
4311 if (error)
4312 return error;
4313#endif
4314
4315 /*
4316 * Off the right end or left end, return failure.
4317 */
4318 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4319 *stat = 0;
4320 return 0;
4321 }
4322
4323 /*
4324 * Point to the record and extract its data.
4325 */
4326 *recp = xfs_btree_rec_addr(cur, ptr, block);
4327 *stat = 1;
4328 return 0;
4329}
Dave Chinner21b5c972013-08-30 10:23:44 +10004330
4331/*
4332 * Change the owner of a btree.
4333 *
4334 * The mechanism we use here is ordered buffer logging. Because we don't know
4335 * how many buffers were are going to need to modify, we don't really want to
4336 * have to make transaction reservations for the worst case of every buffer in a
4337 * full size btree as that may be more space that we can fit in the log....
4338 *
4339 * We do the btree walk in the most optimal manner possible - we have sibling
4340 * pointers so we can just walk all the blocks on each level from left to right
4341 * in a single pass, and then move to the next level and do the same. We can
4342 * also do readahead on the sibling pointers to get IO moving more quickly,
4343 * though for slow disks this is unlikely to make much difference to performance
4344 * as the amount of CPU work we have to do before moving to the next block is
4345 * relatively small.
4346 *
4347 * For each btree block that we load, modify the owner appropriately, set the
4348 * buffer as an ordered buffer and log it appropriately. We need to ensure that
4349 * we mark the region we change dirty so that if the buffer is relogged in
4350 * a subsequent transaction the changes we make here as an ordered buffer are
Dave Chinner638f44162013-08-30 10:23:45 +10004351 * correctly relogged in that transaction. If we are in recovery context, then
4352 * just queue the modified buffer as delayed write buffer so the transaction
4353 * recovery completion writes the changes to disk.
Dave Chinner21b5c972013-08-30 10:23:44 +10004354 */
4355static int
4356xfs_btree_block_change_owner(
4357 struct xfs_btree_cur *cur,
4358 int level,
Dave Chinner638f44162013-08-30 10:23:45 +10004359 __uint64_t new_owner,
4360 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10004361{
4362 struct xfs_btree_block *block;
4363 struct xfs_buf *bp;
4364 union xfs_btree_ptr rptr;
4365
4366 /* do right sibling readahead */
4367 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4368
4369 /* modify the owner */
4370 block = xfs_btree_get_block(cur, level, &bp);
4371 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4372 block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
4373 else
4374 block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
4375
4376 /*
Dave Chinner638f44162013-08-30 10:23:45 +10004377 * If the block is a root block hosted in an inode, we might not have a
4378 * buffer pointer here and we shouldn't attempt to log the change as the
4379 * information is already held in the inode and discarded when the root
4380 * block is formatted into the on-disk inode fork. We still change it,
4381 * though, so everything is consistent in memory.
Dave Chinner21b5c972013-08-30 10:23:44 +10004382 */
4383 if (bp) {
Dave Chinner638f44162013-08-30 10:23:45 +10004384 if (cur->bc_tp) {
4385 xfs_trans_ordered_buf(cur->bc_tp, bp);
4386 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4387 } else {
4388 xfs_buf_delwri_queue(bp, buffer_list);
4389 }
Dave Chinner21b5c972013-08-30 10:23:44 +10004390 } else {
4391 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4392 ASSERT(level == cur->bc_nlevels - 1);
4393 }
4394
4395 /* now read rh sibling block for next iteration */
4396 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4397 if (xfs_btree_ptr_is_null(cur, &rptr))
Dave Chinner24513372014-06-25 14:58:08 +10004398 return -ENOENT;
Dave Chinner21b5c972013-08-30 10:23:44 +10004399
4400 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4401}
4402
4403int
4404xfs_btree_change_owner(
4405 struct xfs_btree_cur *cur,
Dave Chinner638f44162013-08-30 10:23:45 +10004406 __uint64_t new_owner,
4407 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10004408{
4409 union xfs_btree_ptr lptr;
4410 int level;
4411 struct xfs_btree_block *block = NULL;
4412 int error = 0;
4413
4414 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4415
4416 /* for each level */
4417 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4418 /* grab the left hand block */
4419 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4420 if (error)
4421 return error;
4422
4423 /* readahead the left most block for the next level down */
4424 if (level > 0) {
4425 union xfs_btree_ptr *ptr;
4426
4427 ptr = xfs_btree_ptr_addr(cur, 1, block);
4428 xfs_btree_readahead_ptr(cur, ptr, 1);
4429
4430 /* save for the next iteration of the loop */
4431 lptr = *ptr;
4432 }
4433
4434 /* for each buffer in the level */
4435 do {
4436 error = xfs_btree_block_change_owner(cur, level,
Dave Chinner638f44162013-08-30 10:23:45 +10004437 new_owner,
4438 buffer_list);
Dave Chinner21b5c972013-08-30 10:23:44 +10004439 } while (!error);
4440
Dave Chinner24513372014-06-25 14:58:08 +10004441 if (error != -ENOENT)
Dave Chinner21b5c972013-08-30 10:23:44 +10004442 return error;
4443 }
4444
4445 return 0;
4446}
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004447
4448/**
4449 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4450 * btree block
4451 *
4452 * @bp: buffer containing the btree block
4453 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4454 * @pag_max_level: pointer to the per-ag max level field
4455 */
4456bool
4457xfs_btree_sblock_v5hdr_verify(
4458 struct xfs_buf *bp)
4459{
4460 struct xfs_mount *mp = bp->b_target->bt_mount;
4461 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4462 struct xfs_perag *pag = bp->b_pag;
4463
4464 if (!xfs_sb_version_hascrc(&mp->m_sb))
4465 return false;
4466 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4467 return false;
4468 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4469 return false;
4470 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4471 return false;
4472 return true;
4473}
4474
4475/**
4476 * xfs_btree_sblock_verify() -- verify a short-format btree block
4477 *
4478 * @bp: buffer containing the btree block
4479 * @max_recs: maximum records allowed in this btree node
4480 */
4481bool
4482xfs_btree_sblock_verify(
4483 struct xfs_buf *bp,
4484 unsigned int max_recs)
4485{
4486 struct xfs_mount *mp = bp->b_target->bt_mount;
4487 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4488
4489 /* numrecs verification */
4490 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4491 return false;
4492
4493 /* sibling pointer verification */
4494 if (!block->bb_u.s.bb_leftsib ||
4495 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4496 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4497 return false;
4498 if (!block->bb_u.s.bb_rightsib ||
4499 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4500 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4501 return false;
4502
4503 return true;
4504}
Darrick J. Wong19b54ee2016-06-21 11:53:28 +10004505
4506/*
4507 * Calculate the number of btree levels needed to store a given number of
4508 * records in a short-format btree.
4509 */
4510uint
4511xfs_btree_compute_maxlevels(
4512 struct xfs_mount *mp,
4513 uint *limits,
4514 unsigned long len)
4515{
4516 uint level;
4517 unsigned long maxblocks;
4518
4519 maxblocks = (len + limits[0] - 1) / limits[0];
4520 for (level = 1; maxblocks > 1; level++)
4521 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4522 return level;
4523}