blob: 81cad433df85d6a744c898bb2b43f45588c3fbd5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Dave Chinner70a9883c2013-10-23 10:36:05 +110020#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110022#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110024#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110027#include "xfs_trans.h"
Christoph Hellwig38bb7422008-10-30 16:56:22 +110028#include "xfs_inode_item.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050029#include "xfs_buf_item.h"
Nathan Scotta844f452005-11-02 14:38:42 +110030#include "xfs_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070031#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000032#include "xfs_trace.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050033#include "xfs_cksum.h"
Dave Chinnercf11da92014-07-15 07:08:24 +100034#include "xfs_alloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36/*
37 * Cursor allocation zone.
38 */
39kmem_zone_t *xfs_btree_cur_zone;
40
41/*
42 * Btree magic numbers.
43 */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050044static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
Brian Fosteraafc3c22014-04-24 16:00:52 +100045 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
46 XFS_FIBT_MAGIC },
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050047 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
Brian Fosteraafc3c22014-04-24 16:00:52 +100048 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
Linus Torvalds1da177e2005-04-16 15:20:36 -070049};
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050050#define xfs_btree_magic(cur) \
51 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110054STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110055xfs_btree_check_lblock(
56 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110057 struct xfs_btree_block *block, /* btree long form block pointer */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110058 int level, /* level of the btree block */
59 struct xfs_buf *bp) /* buffer for block, if any */
60{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050061 int lblock_ok = 1; /* block passes checks */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110062 struct xfs_mount *mp; /* file system mount point */
63
64 mp = cur->bc_mp;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050065
66 if (xfs_sb_version_hascrc(&mp->m_sb)) {
67 lblock_ok = lblock_ok &&
68 uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
69 block->bb_u.l.bb_blkno == cpu_to_be64(
70 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
71 }
72
73 lblock_ok = lblock_ok &&
74 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110075 be16_to_cpu(block->bb_level) == level &&
76 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +110077 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110078 block->bb_u.l.bb_leftsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100079 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110080 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050081 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110082 block->bb_u.l.bb_rightsib &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +100083 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +110084 XFS_FSB_SANITY_CHECK(mp,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050085 be64_to_cpu(block->bb_u.l.bb_rightsib)));
86
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110087 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
88 XFS_ERRTAG_BTREE_CHECK_LBLOCK,
89 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
90 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000091 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050092 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +100093 return -EFSCORRUPTED;
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110094 }
95 return 0;
96}
97
Christoph Hellwig3cc75242008-10-30 16:58:41 +110098STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -070099xfs_btree_check_sblock(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100100 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100101 struct xfs_btree_block *block, /* btree short form block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 int level, /* level of the btree block */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100103 struct xfs_buf *bp) /* buffer containing block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500105 struct xfs_mount *mp; /* file system mount point */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100106 struct xfs_buf *agbp; /* buffer for ag. freespace struct */
107 struct xfs_agf *agf; /* ag. freespace structure */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 xfs_agblock_t agflen; /* native ag. freespace length */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500109 int sblock_ok = 1; /* block passes checks */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500111 mp = cur->bc_mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 agbp = cur->bc_private.a.agbp;
113 agf = XFS_BUF_TO_AGF(agbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +1100114 agflen = be32_to_cpu(agf->agf_length);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500115
116 if (xfs_sb_version_hascrc(&mp->m_sb)) {
117 sblock_ok = sblock_ok &&
118 uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
119 block->bb_u.s.bb_blkno == cpu_to_be64(
120 bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
121 }
122
123 sblock_ok = sblock_ok &&
124 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&
Christoph Hellwig16259e72005-11-02 15:11:25 +1100125 be16_to_cpu(block->bb_level) == level &&
126 be16_to_cpu(block->bb_numrecs) <=
Christoph Hellwigce5e42d2008-10-30 16:55:23 +1100127 cur->bc_ops->get_maxrecs(cur, level) &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200128 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100129 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
130 block->bb_u.s.bb_leftsib &&
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200131 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100132 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
133 block->bb_u.s.bb_rightsib;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500134
135 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
137 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
138 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000139 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500140 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000141 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 }
143 return 0;
144}
145
146/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100147 * Debug routine: check that block header is ok.
148 */
149int
150xfs_btree_check_block(
151 struct xfs_btree_cur *cur, /* btree cursor */
152 struct xfs_btree_block *block, /* generic btree block pointer */
153 int level, /* level of the btree block */
154 struct xfs_buf *bp) /* buffer containing block, if any */
155{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100156 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
157 return xfs_btree_check_lblock(cur, block, level, bp);
158 else
159 return xfs_btree_check_sblock(cur, block, level, bp);
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100160}
161
162/*
163 * Check that (long) pointer is ok.
164 */
165int /* error (0 or EFSCORRUPTED) */
166xfs_btree_check_lptr(
167 struct xfs_btree_cur *cur, /* btree cursor */
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000168 xfs_fsblock_t bno, /* btree block disk address */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100169 int level) /* btree block level */
170{
171 XFS_WANT_CORRUPTED_RETURN(
172 level > 0 &&
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000173 bno != NULLFSBLOCK &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
175 return 0;
176}
177
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100178#ifdef DEBUG
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100179/*
180 * Check that (short) pointer is ok.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100182STATIC int /* error (0 or EFSCORRUPTED) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183xfs_btree_check_sptr(
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100184 struct xfs_btree_cur *cur, /* btree cursor */
185 xfs_agblock_t bno, /* btree block disk address */
186 int level) /* btree block level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 XFS_WANT_CORRUPTED_RETURN(
191 level > 0 &&
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100192 bno != NULLAGBLOCK &&
193 bno != 0 &&
194 bno < agblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 return 0;
196}
197
198/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100199 * Check that block ptr is ok.
200 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100201STATIC int /* error (0 or EFSCORRUPTED) */
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100202xfs_btree_check_ptr(
203 struct xfs_btree_cur *cur, /* btree cursor */
204 union xfs_btree_ptr *ptr, /* btree block disk address */
205 int index, /* offset from ptr to check */
206 int level) /* btree block level */
207{
208 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
209 return xfs_btree_check_lptr(cur,
210 be64_to_cpu((&ptr->l)[index]), level);
211 } else {
212 return xfs_btree_check_sptr(cur,
213 be32_to_cpu((&ptr->s)[index]), level);
214 }
215}
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100216#endif
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100217
218/*
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500219 * Calculate CRC on the whole btree block and stuff it into the
220 * long-form btree header.
221 *
222 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
223 * it into the buffer so recovery knows what the last modifcation was that made
224 * it to disk.
225 */
226void
227xfs_btree_lblock_calc_crc(
228 struct xfs_buf *bp)
229{
230 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
231 struct xfs_buf_log_item *bip = bp->b_fspriv;
232
233 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
234 return;
235 if (bip)
236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100237 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500238}
239
240bool
241xfs_btree_lblock_verify_crc(
242 struct xfs_buf *bp)
243{
244 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
Eric Sandeen51582172014-02-27 15:17:27 +1100245 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
246
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500247 return true;
248}
249
250/*
251 * Calculate CRC on the whole btree block and stuff it into the
252 * short-form btree header.
253 *
254 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
255 * it into the buffer so recovery knows what the last modifcation was that made
256 * it to disk.
257 */
258void
259xfs_btree_sblock_calc_crc(
260 struct xfs_buf *bp)
261{
262 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
263 struct xfs_buf_log_item *bip = bp->b_fspriv;
264
265 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
266 return;
267 if (bip)
268 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100269 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500270}
271
272bool
273xfs_btree_sblock_verify_crc(
274 struct xfs_buf *bp)
275{
276 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
Eric Sandeen51582172014-02-27 15:17:27 +1100277 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
278
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500279 return true;
280}
281
282/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 * Delete the btree cursor.
284 */
285void
286xfs_btree_del_cursor(
287 xfs_btree_cur_t *cur, /* btree cursor */
288 int error) /* del because of error */
289{
290 int i; /* btree level */
291
292 /*
293 * Clear the buffer pointers, and release the buffers.
294 * If we're doing this in the face of an error, we
295 * need to make sure to inspect all of the entries
296 * in the bc_bufs array for buffers to be unlocked.
297 * This is because some of the btree code works from
298 * level n down to 0, and if we get an error along
299 * the way we won't have initialized all the entries
300 * down to 0.
301 */
302 for (i = 0; i < cur->bc_nlevels; i++) {
303 if (cur->bc_bufs[i])
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000304 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 else if (!error)
306 break;
307 }
308 /*
309 * Can't free a bmap cursor without having dealt with the
310 * allocated indirect blocks' accounting.
311 */
312 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
313 cur->bc_private.b.allocated == 0);
314 /*
315 * Free the cursor.
316 */
317 kmem_zone_free(xfs_btree_cur_zone, cur);
318}
319
320/*
321 * Duplicate the btree cursor.
322 * Allocate a new one, copy the record, re-get the buffers.
323 */
324int /* error */
325xfs_btree_dup_cursor(
326 xfs_btree_cur_t *cur, /* input cursor */
327 xfs_btree_cur_t **ncur) /* output cursor */
328{
329 xfs_buf_t *bp; /* btree block's buffer pointer */
330 int error; /* error return value */
331 int i; /* level number of btree block */
332 xfs_mount_t *mp; /* mount structure for filesystem */
333 xfs_btree_cur_t *new; /* new cursor value */
334 xfs_trans_t *tp; /* transaction pointer, can be NULL */
335
336 tp = cur->bc_tp;
337 mp = cur->bc_mp;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100338
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 /*
340 * Allocate a new cursor like the old one.
341 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100342 new = cur->bc_ops->dup_cursor(cur);
343
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 /*
345 * Copy the record currently in the cursor.
346 */
347 new->bc_rec = cur->bc_rec;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 /*
350 * For each level current, re-get the buffer and copy the ptr value.
351 */
352 for (i = 0; i < new->bc_nlevels; i++) {
353 new->bc_ptrs[i] = cur->bc_ptrs[i];
354 new->bc_ra[i] = cur->bc_ra[i];
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100355 bp = cur->bc_bufs[i];
356 if (bp) {
357 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
358 XFS_BUF_ADDR(bp), mp->m_bsize,
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100359 0, &bp,
Dave Chinner1813dd62012-11-14 17:54:40 +1100360 cur->bc_ops->buf_ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100361 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 xfs_btree_del_cursor(new, error);
363 *ncur = NULL;
364 return error;
365 }
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500366 }
367 new->bc_bufs[i] = bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 *ncur = new;
370 return 0;
371}
372
373/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100374 * XFS btree block layout and addressing:
375 *
376 * There are two types of blocks in the btree: leaf and non-leaf blocks.
377 *
378 * The leaf record start with a header then followed by records containing
379 * the values. A non-leaf block also starts with the same header, and
380 * then first contains lookup keys followed by an equal number of pointers
381 * to the btree blocks at the previous level.
382 *
383 * +--------+-------+-------+-------+-------+-------+-------+
384 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
385 * +--------+-------+-------+-------+-------+-------+-------+
386 *
387 * +--------+-------+-------+-------+-------+-------+-------+
388 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
389 * +--------+-------+-------+-------+-------+-------+-------+
390 *
391 * The header is called struct xfs_btree_block for reasons better left unknown
392 * and comes in different versions for short (32bit) and long (64bit) block
393 * pointers. The record and key structures are defined by the btree instances
394 * and opaque to the btree core. The block pointers are simple disk endian
395 * integers, available in a short (32bit) and long (64bit) variant.
396 *
397 * The helpers below calculate the offset of a given record, key or pointer
398 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
399 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
400 * inside the btree block is done using indices starting at one, not zero!
401 */
402
403/*
404 * Return size of the btree block header for this btree instance.
405 */
406static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
407{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500408 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
409 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
410 return XFS_BTREE_LBLOCK_CRC_LEN;
411 return XFS_BTREE_LBLOCK_LEN;
412 }
413 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
414 return XFS_BTREE_SBLOCK_CRC_LEN;
415 return XFS_BTREE_SBLOCK_LEN;
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100416}
417
418/*
419 * Return size of btree block pointers for this btree instance.
420 */
421static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
422{
423 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
424 sizeof(__be64) : sizeof(__be32);
425}
426
427/*
428 * Calculate offset of the n-th record in a btree block.
429 */
430STATIC size_t
431xfs_btree_rec_offset(
432 struct xfs_btree_cur *cur,
433 int n)
434{
435 return xfs_btree_block_len(cur) +
436 (n - 1) * cur->bc_ops->rec_len;
437}
438
439/*
440 * Calculate offset of the n-th key in a btree block.
441 */
442STATIC size_t
443xfs_btree_key_offset(
444 struct xfs_btree_cur *cur,
445 int n)
446{
447 return xfs_btree_block_len(cur) +
448 (n - 1) * cur->bc_ops->key_len;
449}
450
451/*
452 * Calculate offset of the n-th block pointer in a btree block.
453 */
454STATIC size_t
455xfs_btree_ptr_offset(
456 struct xfs_btree_cur *cur,
457 int n,
458 int level)
459{
460 return xfs_btree_block_len(cur) +
461 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
462 (n - 1) * xfs_btree_ptr_len(cur);
463}
464
465/*
466 * Return a pointer to the n-th record in the btree block.
467 */
468STATIC union xfs_btree_rec *
469xfs_btree_rec_addr(
470 struct xfs_btree_cur *cur,
471 int n,
472 struct xfs_btree_block *block)
473{
474 return (union xfs_btree_rec *)
475 ((char *)block + xfs_btree_rec_offset(cur, n));
476}
477
478/*
479 * Return a pointer to the n-th key in the btree block.
480 */
481STATIC union xfs_btree_key *
482xfs_btree_key_addr(
483 struct xfs_btree_cur *cur,
484 int n,
485 struct xfs_btree_block *block)
486{
487 return (union xfs_btree_key *)
488 ((char *)block + xfs_btree_key_offset(cur, n));
489}
490
491/*
492 * Return a pointer to the n-th block pointer in the btree block.
493 */
494STATIC union xfs_btree_ptr *
495xfs_btree_ptr_addr(
496 struct xfs_btree_cur *cur,
497 int n,
498 struct xfs_btree_block *block)
499{
500 int level = xfs_btree_get_level(block);
501
502 ASSERT(block->bb_level != 0);
503
504 return (union xfs_btree_ptr *)
505 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
506}
507
508/*
Zhi Yong Wu1cb93862013-08-07 10:11:05 +0000509 * Get the root block which is stored in the inode.
Christoph Hellwig8186e512008-10-30 16:54:22 +1100510 *
511 * For now this btree implementation assumes the btree root is always
512 * stored in the if_broot field of an inode fork.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 */
Christoph Hellwig8186e512008-10-30 16:54:22 +1100514STATIC struct xfs_btree_block *
515xfs_btree_get_iroot(
516 struct xfs_btree_cur *cur)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517{
Christoph Hellwig8186e512008-10-30 16:54:22 +1100518 struct xfs_ifork *ifp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519
Christoph Hellwig8186e512008-10-30 16:54:22 +1100520 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
521 return (struct xfs_btree_block *)ifp->if_broot;
522}
523
524/*
525 * Retrieve the block pointer from the cursor at the given level.
526 * This may be an inode btree root or from a buffer.
527 */
528STATIC struct xfs_btree_block * /* generic btree block pointer */
529xfs_btree_get_block(
530 struct xfs_btree_cur *cur, /* btree cursor */
531 int level, /* level in btree */
532 struct xfs_buf **bpp) /* buffer containing the block */
533{
534 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
535 (level == cur->bc_nlevels - 1)) {
536 *bpp = NULL;
537 return xfs_btree_get_iroot(cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 }
Christoph Hellwig8186e512008-10-30 16:54:22 +1100539
540 *bpp = cur->bc_bufs[level];
541 return XFS_BUF_TO_BLOCK(*bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542}
543
544/*
545 * Get a buffer for the block, return it with no data read.
546 * Long-form addressing.
547 */
548xfs_buf_t * /* buffer for fsbno */
549xfs_btree_get_bufl(
550 xfs_mount_t *mp, /* file system mount point */
551 xfs_trans_t *tp, /* transaction pointer */
552 xfs_fsblock_t fsbno, /* file system block number */
553 uint lock) /* lock flags for get_buf */
554{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 xfs_daddr_t d; /* real disk block address */
556
557 ASSERT(fsbno != NULLFSBLOCK);
558 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000559 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560}
561
562/*
563 * Get a buffer for the block, return it with no data read.
564 * Short-form addressing.
565 */
566xfs_buf_t * /* buffer for agno/agbno */
567xfs_btree_get_bufs(
568 xfs_mount_t *mp, /* file system mount point */
569 xfs_trans_t *tp, /* transaction pointer */
570 xfs_agnumber_t agno, /* allocation group number */
571 xfs_agblock_t agbno, /* allocation group block number */
572 uint lock) /* lock flags for get_buf */
573{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 xfs_daddr_t d; /* real disk block address */
575
576 ASSERT(agno != NULLAGNUMBER);
577 ASSERT(agbno != NULLAGBLOCK);
578 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000579 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580}
581
582/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 * Check for the cursor referring to the last block at the given level.
584 */
585int /* 1=is last block, 0=not last block */
586xfs_btree_islastblock(
587 xfs_btree_cur_t *cur, /* btree cursor */
588 int level) /* level to check */
589{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100590 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 xfs_buf_t *bp; /* buffer containing block */
592
593 block = xfs_btree_get_block(cur, level, &bp);
594 xfs_btree_check_block(cur, block, level, bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100595 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000596 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200598 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599}
600
601/*
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000602 * Change the cursor to point to the first record at the given level.
603 * Other levels are unaffected.
604 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100605STATIC int /* success=1, failure=0 */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000606xfs_btree_firstrec(
607 xfs_btree_cur_t *cur, /* btree cursor */
608 int level) /* level to change */
609{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100610 struct xfs_btree_block *block; /* generic btree block pointer */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000611 xfs_buf_t *bp; /* buffer containing block */
612
613 /*
614 * Get the block pointer for this level.
615 */
616 block = xfs_btree_get_block(cur, level, &bp);
617 xfs_btree_check_block(cur, block, level, bp);
618 /*
619 * It's empty, there is no such record.
620 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100621 if (!block->bb_numrecs)
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000622 return 0;
623 /*
624 * Set the ptr value to 1, that's the first record/key.
625 */
626 cur->bc_ptrs[level] = 1;
627 return 1;
628}
629
630/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 * Change the cursor to point to the last record in the current block
632 * at the given level. Other levels are unaffected.
633 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100634STATIC int /* success=1, failure=0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635xfs_btree_lastrec(
636 xfs_btree_cur_t *cur, /* btree cursor */
637 int level) /* level to change */
638{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100639 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 xfs_buf_t *bp; /* buffer containing block */
641
642 /*
643 * Get the block pointer for this level.
644 */
645 block = xfs_btree_get_block(cur, level, &bp);
646 xfs_btree_check_block(cur, block, level, bp);
647 /*
648 * It's empty, there is no such record.
649 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100650 if (!block->bb_numrecs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 return 0;
652 /*
653 * Set the ptr value to numrecs, that's the last record/key.
654 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100655 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 return 1;
657}
658
659/*
660 * Compute first and last byte offsets for the fields given.
661 * Interprets the offsets table, which contains struct field offsets.
662 */
663void
664xfs_btree_offsets(
665 __int64_t fields, /* bitmask of fields */
666 const short *offsets, /* table of field offsets */
667 int nbits, /* number of bits to inspect */
668 int *first, /* output: first byte offset */
669 int *last) /* output: last byte offset */
670{
671 int i; /* current bit number */
672 __int64_t imask; /* mask for current bit number */
673
674 ASSERT(fields != 0);
675 /*
676 * Find the lowest bit, so the first byte offset.
677 */
678 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
679 if (imask & fields) {
680 *first = offsets[i];
681 break;
682 }
683 }
684 /*
685 * Find the highest bit, so the last byte offset.
686 */
687 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
688 if (imask & fields) {
689 *last = offsets[i + 1] - 1;
690 break;
691 }
692 }
693}
694
695/*
696 * Get a buffer for the block, return it read in.
697 * Long-form addressing.
698 */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100699int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700xfs_btree_read_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100701 struct xfs_mount *mp, /* file system mount point */
702 struct xfs_trans *tp, /* transaction pointer */
703 xfs_fsblock_t fsbno, /* file system block number */
704 uint lock, /* lock flags for read_buf */
705 struct xfs_buf **bpp, /* buffer for fsbno */
706 int refval, /* ref count value for buffer */
Dave Chinner1813dd62012-11-14 17:54:40 +1100707 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708{
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100709 struct xfs_buf *bp; /* return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 xfs_daddr_t d; /* real disk block address */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100711 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
713 ASSERT(fsbno != NULLFSBLOCK);
714 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100715 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
Dave Chinner1813dd62012-11-14 17:54:40 +1100716 mp->m_bsize, lock, &bp, ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100717 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 return error;
Dave Chinner821eb212010-12-02 16:31:13 +1100719 if (bp)
Christoph Hellwig38f23232011-10-10 16:52:45 +0000720 xfs_buf_set_ref(bp, refval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 *bpp = bp;
722 return 0;
723}
724
725/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 * Read-ahead the block, don't wait for it, don't return a buffer.
727 * Long-form addressing.
728 */
729/* ARGSUSED */
730void
731xfs_btree_reada_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100732 struct xfs_mount *mp, /* file system mount point */
733 xfs_fsblock_t fsbno, /* file system block number */
734 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100735 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736{
737 xfs_daddr_t d;
738
739 ASSERT(fsbno != NULLFSBLOCK);
740 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100741 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742}
743
744/*
745 * Read-ahead the block, don't wait for it, don't return a buffer.
746 * Short-form addressing.
747 */
748/* ARGSUSED */
749void
750xfs_btree_reada_bufs(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100751 struct xfs_mount *mp, /* file system mount point */
752 xfs_agnumber_t agno, /* allocation group number */
753 xfs_agblock_t agbno, /* allocation group block number */
754 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100755 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756{
757 xfs_daddr_t d;
758
759 ASSERT(agno != NULLAGNUMBER);
760 ASSERT(agbno != NULLAGBLOCK);
761 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100762 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763}
764
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100765STATIC int
766xfs_btree_readahead_lblock(
767 struct xfs_btree_cur *cur,
768 int lr,
769 struct xfs_btree_block *block)
770{
771 int rval = 0;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000772 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
773 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100774
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000775 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100776 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100777 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100778 rval++;
779 }
780
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000781 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100782 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100783 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100784 rval++;
785 }
786
787 return rval;
788}
789
790STATIC int
791xfs_btree_readahead_sblock(
792 struct xfs_btree_cur *cur,
793 int lr,
794 struct xfs_btree_block *block)
795{
796 int rval = 0;
797 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
798 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
799
800
801 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
802 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100803 left, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100804 rval++;
805 }
806
807 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
808 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100809 right, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100810 rval++;
811 }
812
813 return rval;
814}
815
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816/*
817 * Read-ahead btree blocks, at the given level.
818 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
819 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100820STATIC int
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100821xfs_btree_readahead(
822 struct xfs_btree_cur *cur, /* btree cursor */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 int lev, /* level in btree */
824 int lr) /* left/right bits */
825{
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100826 struct xfs_btree_block *block;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100828 /*
829 * No readahead needed if we are at the root level and the
830 * btree root is stored in the inode.
831 */
832 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
833 (lev == cur->bc_nlevels - 1))
834 return 0;
835
836 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
837 return 0;
838
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 cur->bc_ra[lev] |= lr;
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100840 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
841
842 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
843 return xfs_btree_readahead_lblock(cur, lr, block);
844 return xfs_btree_readahead_sblock(cur, lr, block);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845}
846
Dave Chinner21b5c972013-08-30 10:23:44 +1000847STATIC xfs_daddr_t
848xfs_btree_ptr_to_daddr(
849 struct xfs_btree_cur *cur,
850 union xfs_btree_ptr *ptr)
851{
852 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000853 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
Dave Chinner21b5c972013-08-30 10:23:44 +1000854
855 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
856 } else {
857 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
858 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
859
860 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
861 be32_to_cpu(ptr->s));
862 }
863}
864
865/*
866 * Readahead @count btree blocks at the given @ptr location.
867 *
868 * We don't need to care about long or short form btrees here as we have a
869 * method of converting the ptr directly to a daddr available to us.
870 */
871STATIC void
872xfs_btree_readahead_ptr(
873 struct xfs_btree_cur *cur,
874 union xfs_btree_ptr *ptr,
875 xfs_extlen_t count)
876{
877 xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
878 xfs_btree_ptr_to_daddr(cur, ptr),
879 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
880}
881
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882/*
883 * Set the buffer for level "lev" in the cursor to bp, releasing
884 * any previous buffer.
885 */
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000886STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887xfs_btree_setbuf(
888 xfs_btree_cur_t *cur, /* btree cursor */
889 int lev, /* level in btree */
890 xfs_buf_t *bp) /* new buffer to set */
891{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100892 struct xfs_btree_block *b; /* btree block */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000894 if (cur->bc_bufs[lev])
895 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 cur->bc_bufs[lev] = bp;
897 cur->bc_ra[lev] = 0;
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000898
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 b = XFS_BUF_TO_BLOCK(bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100900 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000901 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000903 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
905 } else {
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200906 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200908 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
910 }
911}
Christoph Hellwig637aa502008-10-30 16:55:45 +1100912
913STATIC int
914xfs_btree_ptr_is_null(
915 struct xfs_btree_cur *cur,
916 union xfs_btree_ptr *ptr)
917{
918 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000919 return ptr->l == cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +1100920 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200921 return ptr->s == cpu_to_be32(NULLAGBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +1100922}
923
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100924STATIC void
925xfs_btree_set_ptr_null(
926 struct xfs_btree_cur *cur,
927 union xfs_btree_ptr *ptr)
928{
929 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000930 ptr->l = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100931 else
932 ptr->s = cpu_to_be32(NULLAGBLOCK);
933}
934
Christoph Hellwig637aa502008-10-30 16:55:45 +1100935/*
936 * Get/set/init sibling pointers
937 */
938STATIC void
939xfs_btree_get_sibling(
940 struct xfs_btree_cur *cur,
941 struct xfs_btree_block *block,
942 union xfs_btree_ptr *ptr,
943 int lr)
944{
945 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
946
947 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
948 if (lr == XFS_BB_RIGHTSIB)
949 ptr->l = block->bb_u.l.bb_rightsib;
950 else
951 ptr->l = block->bb_u.l.bb_leftsib;
952 } else {
953 if (lr == XFS_BB_RIGHTSIB)
954 ptr->s = block->bb_u.s.bb_rightsib;
955 else
956 ptr->s = block->bb_u.s.bb_leftsib;
957 }
958}
959
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +1100960STATIC void
961xfs_btree_set_sibling(
962 struct xfs_btree_cur *cur,
963 struct xfs_btree_block *block,
964 union xfs_btree_ptr *ptr,
965 int lr)
966{
967 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
968
969 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
970 if (lr == XFS_BB_RIGHTSIB)
971 block->bb_u.l.bb_rightsib = ptr->l;
972 else
973 block->bb_u.l.bb_leftsib = ptr->l;
974 } else {
975 if (lr == XFS_BB_RIGHTSIB)
976 block->bb_u.s.bb_rightsib = ptr->s;
977 else
978 block->bb_u.s.bb_leftsib = ptr->s;
979 }
980}
981
Dave Chinnerb64f3a32012-11-13 16:40:27 -0600982void
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500983xfs_btree_init_block_int(
984 struct xfs_mount *mp,
985 struct xfs_btree_block *buf,
986 xfs_daddr_t blkno,
987 __u32 magic,
988 __u16 level,
989 __u16 numrecs,
990 __u64 owner,
991 unsigned int flags)
992{
993 buf->bb_magic = cpu_to_be32(magic);
994 buf->bb_level = cpu_to_be16(level);
995 buf->bb_numrecs = cpu_to_be16(numrecs);
996
997 if (flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000998 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
999 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001000 if (flags & XFS_BTREE_CRC_BLOCKS) {
1001 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1002 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
1003 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
1004 buf->bb_u.l.bb_pad = 0;
Dave Chinnerb58fa552013-08-28 21:22:46 +10001005 buf->bb_u.l.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001006 }
1007 } else {
1008 /* owner is a 32 bit value on short blocks */
1009 __u32 __owner = (__u32)owner;
1010
1011 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1012 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1013 if (flags & XFS_BTREE_CRC_BLOCKS) {
1014 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1015 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
1016 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
Dave Chinnerb58fa552013-08-28 21:22:46 +10001017 buf->bb_u.s.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001018 }
1019 }
1020}
1021
1022void
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001023xfs_btree_init_block(
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001024 struct xfs_mount *mp,
1025 struct xfs_buf *bp,
1026 __u32 magic,
1027 __u16 level,
1028 __u16 numrecs,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001029 __u64 owner,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001030 unsigned int flags)
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001031{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001032 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1033 magic, level, numrecs, owner, flags);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001034}
1035
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001036STATIC void
1037xfs_btree_init_block_cur(
1038 struct xfs_btree_cur *cur,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001039 struct xfs_buf *bp,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001040 int level,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001041 int numrecs)
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001042{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001043 __u64 owner;
1044
1045 /*
1046 * we can pull the owner from the cursor right now as the different
1047 * owners align directly with the pointer size of the btree. This may
1048 * change in future, but is safe for current users of the generic btree
1049 * code.
1050 */
1051 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1052 owner = cur->bc_private.b.ip->i_ino;
1053 else
1054 owner = cur->bc_private.a.agno;
1055
1056 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1057 xfs_btree_magic(cur), level, numrecs,
1058 owner, cur->bc_flags);
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001059}
1060
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001061/*
1062 * Return true if ptr is the last record in the btree and
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001063 * we need to track updates to this record. The decision
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001064 * will be further refined in the update_lastrec method.
1065 */
1066STATIC int
1067xfs_btree_is_lastrec(
1068 struct xfs_btree_cur *cur,
1069 struct xfs_btree_block *block,
1070 int level)
1071{
1072 union xfs_btree_ptr ptr;
1073
1074 if (level > 0)
1075 return 0;
1076 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1077 return 0;
1078
1079 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1080 if (!xfs_btree_ptr_is_null(cur, &ptr))
1081 return 0;
1082 return 1;
1083}
1084
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001085STATIC void
1086xfs_btree_buf_to_ptr(
1087 struct xfs_btree_cur *cur,
1088 struct xfs_buf *bp,
1089 union xfs_btree_ptr *ptr)
1090{
1091 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1092 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1093 XFS_BUF_ADDR(bp)));
1094 else {
Eric Sandeen9d87c312009-01-14 23:22:07 -06001095 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001096 XFS_BUF_ADDR(bp)));
1097 }
1098}
1099
Christoph Hellwig637aa502008-10-30 16:55:45 +11001100STATIC void
1101xfs_btree_set_refs(
1102 struct xfs_btree_cur *cur,
1103 struct xfs_buf *bp)
1104{
1105 switch (cur->bc_btnum) {
1106 case XFS_BTNUM_BNO:
1107 case XFS_BTNUM_CNT:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001108 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001109 break;
1110 case XFS_BTNUM_INO:
Brian Fosteraafc3c22014-04-24 16:00:52 +10001111 case XFS_BTNUM_FINO:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001112 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001113 break;
1114 case XFS_BTNUM_BMAP:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001115 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001116 break;
1117 default:
1118 ASSERT(0);
1119 }
1120}
1121
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001122STATIC int
1123xfs_btree_get_buf_block(
1124 struct xfs_btree_cur *cur,
1125 union xfs_btree_ptr *ptr,
1126 int flags,
1127 struct xfs_btree_block **block,
1128 struct xfs_buf **bpp)
1129{
1130 struct xfs_mount *mp = cur->bc_mp;
1131 xfs_daddr_t d;
1132
1133 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001134 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001135
1136 d = xfs_btree_ptr_to_daddr(cur, ptr);
1137 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1138 mp->m_bsize, flags);
1139
Chandra Seetharaman2a30f36d2011-09-20 13:56:55 +00001140 if (!*bpp)
Dave Chinner24513372014-06-25 14:58:08 +10001141 return -ENOMEM;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001142
Dave Chinner1813dd62012-11-14 17:54:40 +11001143 (*bpp)->b_ops = cur->bc_ops->buf_ops;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001144 *block = XFS_BUF_TO_BLOCK(*bpp);
1145 return 0;
1146}
1147
Christoph Hellwig637aa502008-10-30 16:55:45 +11001148/*
1149 * Read in the buffer at the given ptr and return the buffer and
1150 * the block pointer within the buffer.
1151 */
1152STATIC int
1153xfs_btree_read_buf_block(
1154 struct xfs_btree_cur *cur,
1155 union xfs_btree_ptr *ptr,
Christoph Hellwig637aa502008-10-30 16:55:45 +11001156 int flags,
1157 struct xfs_btree_block **block,
1158 struct xfs_buf **bpp)
1159{
1160 struct xfs_mount *mp = cur->bc_mp;
1161 xfs_daddr_t d;
1162 int error;
1163
1164 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001165 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwig637aa502008-10-30 16:55:45 +11001166
1167 d = xfs_btree_ptr_to_daddr(cur, ptr);
1168 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001169 mp->m_bsize, flags, bpp,
Dave Chinner1813dd62012-11-14 17:54:40 +11001170 cur->bc_ops->buf_ops);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001171 if (error)
1172 return error;
1173
Christoph Hellwig637aa502008-10-30 16:55:45 +11001174 xfs_btree_set_refs(cur, *bpp);
1175 *block = XFS_BUF_TO_BLOCK(*bpp);
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001176 return 0;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001177}
1178
1179/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001180 * Copy keys from one btree block to another.
1181 */
1182STATIC void
1183xfs_btree_copy_keys(
1184 struct xfs_btree_cur *cur,
1185 union xfs_btree_key *dst_key,
1186 union xfs_btree_key *src_key,
1187 int numkeys)
1188{
1189 ASSERT(numkeys >= 0);
1190 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1191}
1192
1193/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001194 * Copy records from one btree block to another.
1195 */
1196STATIC void
1197xfs_btree_copy_recs(
1198 struct xfs_btree_cur *cur,
1199 union xfs_btree_rec *dst_rec,
1200 union xfs_btree_rec *src_rec,
1201 int numrecs)
1202{
1203 ASSERT(numrecs >= 0);
1204 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1205}
1206
1207/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001208 * Copy block pointers from one btree block to another.
1209 */
1210STATIC void
1211xfs_btree_copy_ptrs(
1212 struct xfs_btree_cur *cur,
1213 union xfs_btree_ptr *dst_ptr,
1214 union xfs_btree_ptr *src_ptr,
1215 int numptrs)
1216{
1217 ASSERT(numptrs >= 0);
1218 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1219}
1220
1221/*
1222 * Shift keys one index left/right inside a single btree block.
1223 */
1224STATIC void
1225xfs_btree_shift_keys(
1226 struct xfs_btree_cur *cur,
1227 union xfs_btree_key *key,
1228 int dir,
1229 int numkeys)
1230{
1231 char *dst_key;
1232
1233 ASSERT(numkeys >= 0);
1234 ASSERT(dir == 1 || dir == -1);
1235
1236 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1237 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1238}
1239
1240/*
1241 * Shift records one index left/right inside a single btree block.
1242 */
1243STATIC void
1244xfs_btree_shift_recs(
1245 struct xfs_btree_cur *cur,
1246 union xfs_btree_rec *rec,
1247 int dir,
1248 int numrecs)
1249{
1250 char *dst_rec;
1251
1252 ASSERT(numrecs >= 0);
1253 ASSERT(dir == 1 || dir == -1);
1254
1255 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1256 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1257}
1258
1259/*
1260 * Shift block pointers one index left/right inside a single btree block.
1261 */
1262STATIC void
1263xfs_btree_shift_ptrs(
1264 struct xfs_btree_cur *cur,
1265 union xfs_btree_ptr *ptr,
1266 int dir,
1267 int numptrs)
1268{
1269 char *dst_ptr;
1270
1271 ASSERT(numptrs >= 0);
1272 ASSERT(dir == 1 || dir == -1);
1273
1274 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1275 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1276}
1277
1278/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001279 * Log key values from the btree block.
1280 */
1281STATIC void
1282xfs_btree_log_keys(
1283 struct xfs_btree_cur *cur,
1284 struct xfs_buf *bp,
1285 int first,
1286 int last)
1287{
1288 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1289 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1290
1291 if (bp) {
Dave Chinner61fe1352013-04-03 16:11:30 +11001292 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001293 xfs_trans_log_buf(cur->bc_tp, bp,
1294 xfs_btree_key_offset(cur, first),
1295 xfs_btree_key_offset(cur, last + 1) - 1);
1296 } else {
1297 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1298 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1299 }
1300
1301 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1302}
1303
1304/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001305 * Log record values from the btree block.
1306 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001307void
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001308xfs_btree_log_recs(
1309 struct xfs_btree_cur *cur,
1310 struct xfs_buf *bp,
1311 int first,
1312 int last)
1313{
1314 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1315 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1316
Dave Chinner61fe1352013-04-03 16:11:30 +11001317 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001318 xfs_trans_log_buf(cur->bc_tp, bp,
1319 xfs_btree_rec_offset(cur, first),
1320 xfs_btree_rec_offset(cur, last + 1) - 1);
1321
1322 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1323}
1324
1325/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001326 * Log block pointer fields from a btree block (nonleaf).
1327 */
1328STATIC void
1329xfs_btree_log_ptrs(
1330 struct xfs_btree_cur *cur, /* btree cursor */
1331 struct xfs_buf *bp, /* buffer containing btree block */
1332 int first, /* index of first pointer to log */
1333 int last) /* index of last pointer to log */
1334{
1335 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1336 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1337
1338 if (bp) {
1339 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1340 int level = xfs_btree_get_level(block);
1341
Dave Chinner61fe1352013-04-03 16:11:30 +11001342 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001343 xfs_trans_log_buf(cur->bc_tp, bp,
1344 xfs_btree_ptr_offset(cur, first, level),
1345 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1346 } else {
1347 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1348 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1349 }
1350
1351 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1352}
1353
1354/*
1355 * Log fields from a btree block header.
1356 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001357void
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001358xfs_btree_log_block(
1359 struct xfs_btree_cur *cur, /* btree cursor */
1360 struct xfs_buf *bp, /* buffer containing btree block */
1361 int fields) /* mask of fields: XFS_BB_... */
1362{
1363 int first; /* first byte offset logged */
1364 int last; /* last byte offset logged */
1365 static const short soffsets[] = { /* table of offsets (short) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001366 offsetof(struct xfs_btree_block, bb_magic),
1367 offsetof(struct xfs_btree_block, bb_level),
1368 offsetof(struct xfs_btree_block, bb_numrecs),
1369 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1370 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001371 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1372 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1373 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1374 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1375 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1376 XFS_BTREE_SBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001377 };
1378 static const short loffsets[] = { /* table of offsets (long) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001379 offsetof(struct xfs_btree_block, bb_magic),
1380 offsetof(struct xfs_btree_block, bb_level),
1381 offsetof(struct xfs_btree_block, bb_numrecs),
1382 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1383 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001384 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1385 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1386 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1387 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1388 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1389 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1390 XFS_BTREE_LBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001391 };
1392
1393 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1394 XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1395
1396 if (bp) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001397 int nbits;
1398
1399 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1400 /*
1401 * We don't log the CRC when updating a btree
1402 * block but instead recreate it during log
1403 * recovery. As the log buffers have checksums
1404 * of their own this is safe and avoids logging a crc
1405 * update in a lot of places.
1406 */
1407 if (fields == XFS_BB_ALL_BITS)
1408 fields = XFS_BB_ALL_BITS_CRC;
1409 nbits = XFS_BB_NUM_BITS_CRC;
1410 } else {
1411 nbits = XFS_BB_NUM_BITS;
1412 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001413 xfs_btree_offsets(fields,
1414 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1415 loffsets : soffsets,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001416 nbits, &first, &last);
Dave Chinner61fe1352013-04-03 16:11:30 +11001417 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001418 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1419 } else {
1420 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1421 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1422 }
1423
1424 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1425}
1426
1427/*
Christoph Hellwig637aa502008-10-30 16:55:45 +11001428 * Increment cursor by one record at the level.
1429 * For nonzero levels the leaf-ward information is untouched.
1430 */
1431int /* error */
1432xfs_btree_increment(
1433 struct xfs_btree_cur *cur,
1434 int level,
1435 int *stat) /* success/failure */
1436{
1437 struct xfs_btree_block *block;
1438 union xfs_btree_ptr ptr;
1439 struct xfs_buf *bp;
1440 int error; /* error return value */
1441 int lev;
1442
1443 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1444 XFS_BTREE_TRACE_ARGI(cur, level);
1445
1446 ASSERT(level < cur->bc_nlevels);
1447
1448 /* Read-ahead to the right at this level. */
1449 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1450
1451 /* Get a pointer to the btree block. */
1452 block = xfs_btree_get_block(cur, level, &bp);
1453
1454#ifdef DEBUG
1455 error = xfs_btree_check_block(cur, block, level, bp);
1456 if (error)
1457 goto error0;
1458#endif
1459
1460 /* We're done if we remain in the block after the increment. */
1461 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1462 goto out1;
1463
1464 /* Fail if we just went off the right edge of the tree. */
1465 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1466 if (xfs_btree_ptr_is_null(cur, &ptr))
1467 goto out0;
1468
1469 XFS_BTREE_STATS_INC(cur, increment);
1470
1471 /*
1472 * March up the tree incrementing pointers.
1473 * Stop when we don't go off the right edge of a block.
1474 */
1475 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1476 block = xfs_btree_get_block(cur, lev, &bp);
1477
1478#ifdef DEBUG
1479 error = xfs_btree_check_block(cur, block, lev, bp);
1480 if (error)
1481 goto error0;
1482#endif
1483
1484 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1485 break;
1486
1487 /* Read-ahead the right block for the next loop. */
1488 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1489 }
1490
1491 /*
1492 * If we went off the root then we are either seriously
1493 * confused or have the tree root in an inode.
1494 */
1495 if (lev == cur->bc_nlevels) {
1496 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1497 goto out0;
1498 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001499 error = -EFSCORRUPTED;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001500 goto error0;
1501 }
1502 ASSERT(lev < cur->bc_nlevels);
1503
1504 /*
1505 * Now walk back down the tree, fixing up the cursor's buffer
1506 * pointers and key numbers.
1507 */
1508 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1509 union xfs_btree_ptr *ptrp;
1510
1511 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001512 --lev;
1513 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001514 if (error)
1515 goto error0;
1516
1517 xfs_btree_setbuf(cur, lev, bp);
1518 cur->bc_ptrs[lev] = 1;
1519 }
1520out1:
1521 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1522 *stat = 1;
1523 return 0;
1524
1525out0:
1526 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1527 *stat = 0;
1528 return 0;
1529
1530error0:
1531 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1532 return error;
1533}
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001534
1535/*
1536 * Decrement cursor by one record at the level.
1537 * For nonzero levels the leaf-ward information is untouched.
1538 */
1539int /* error */
1540xfs_btree_decrement(
1541 struct xfs_btree_cur *cur,
1542 int level,
1543 int *stat) /* success/failure */
1544{
1545 struct xfs_btree_block *block;
1546 xfs_buf_t *bp;
1547 int error; /* error return value */
1548 int lev;
1549 union xfs_btree_ptr ptr;
1550
1551 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1552 XFS_BTREE_TRACE_ARGI(cur, level);
1553
1554 ASSERT(level < cur->bc_nlevels);
1555
1556 /* Read-ahead to the left at this level. */
1557 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1558
1559 /* We're done if we remain in the block after the decrement. */
1560 if (--cur->bc_ptrs[level] > 0)
1561 goto out1;
1562
1563 /* Get a pointer to the btree block. */
1564 block = xfs_btree_get_block(cur, level, &bp);
1565
1566#ifdef DEBUG
1567 error = xfs_btree_check_block(cur, block, level, bp);
1568 if (error)
1569 goto error0;
1570#endif
1571
1572 /* Fail if we just went off the left edge of the tree. */
1573 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1574 if (xfs_btree_ptr_is_null(cur, &ptr))
1575 goto out0;
1576
1577 XFS_BTREE_STATS_INC(cur, decrement);
1578
1579 /*
1580 * March up the tree decrementing pointers.
1581 * Stop when we don't go off the left edge of a block.
1582 */
1583 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1584 if (--cur->bc_ptrs[lev] > 0)
1585 break;
1586 /* Read-ahead the left block for the next loop. */
1587 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1588 }
1589
1590 /*
1591 * If we went off the root then we are seriously confused.
1592 * or the root of the tree is in an inode.
1593 */
1594 if (lev == cur->bc_nlevels) {
1595 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1596 goto out0;
1597 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001598 error = -EFSCORRUPTED;
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001599 goto error0;
1600 }
1601 ASSERT(lev < cur->bc_nlevels);
1602
1603 /*
1604 * Now walk back down the tree, fixing up the cursor's buffer
1605 * pointers and key numbers.
1606 */
1607 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1608 union xfs_btree_ptr *ptrp;
1609
1610 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001611 --lev;
1612 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001613 if (error)
1614 goto error0;
1615 xfs_btree_setbuf(cur, lev, bp);
1616 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1617 }
1618out1:
1619 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1620 *stat = 1;
1621 return 0;
1622
1623out0:
1624 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1625 *stat = 0;
1626 return 0;
1627
1628error0:
1629 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1630 return error;
1631}
1632
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001633STATIC int
1634xfs_btree_lookup_get_block(
1635 struct xfs_btree_cur *cur, /* btree cursor */
1636 int level, /* level in the btree */
1637 union xfs_btree_ptr *pp, /* ptr to btree block */
1638 struct xfs_btree_block **blkp) /* return btree block */
1639{
1640 struct xfs_buf *bp; /* buffer pointer for btree block */
1641 int error = 0;
1642
1643 /* special case the root block if in an inode */
1644 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1645 (level == cur->bc_nlevels - 1)) {
1646 *blkp = xfs_btree_get_iroot(cur);
1647 return 0;
1648 }
1649
1650 /*
1651 * If the old buffer at this level for the disk address we are
1652 * looking for re-use it.
1653 *
1654 * Otherwise throw it away and get a new one.
1655 */
1656 bp = cur->bc_bufs[level];
1657 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1658 *blkp = XFS_BUF_TO_BLOCK(bp);
1659 return 0;
1660 }
1661
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001662 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001663 if (error)
1664 return error;
1665
1666 xfs_btree_setbuf(cur, level, bp);
1667 return 0;
1668}
1669
1670/*
1671 * Get current search key. For level 0 we don't actually have a key
1672 * structure so we make one up from the record. For all other levels
1673 * we just return the right key.
1674 */
1675STATIC union xfs_btree_key *
1676xfs_lookup_get_search_key(
1677 struct xfs_btree_cur *cur,
1678 int level,
1679 int keyno,
1680 struct xfs_btree_block *block,
1681 union xfs_btree_key *kp)
1682{
1683 if (level == 0) {
1684 cur->bc_ops->init_key_from_rec(kp,
1685 xfs_btree_rec_addr(cur, keyno, block));
1686 return kp;
1687 }
1688
1689 return xfs_btree_key_addr(cur, keyno, block);
1690}
1691
1692/*
1693 * Lookup the record. The cursor is made to point to it, based on dir.
Zhi Yong Wu49d3da12013-08-07 10:11:00 +00001694 * stat is set to 0 if can't find any such record, 1 for success.
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001695 */
1696int /* error */
1697xfs_btree_lookup(
1698 struct xfs_btree_cur *cur, /* btree cursor */
1699 xfs_lookup_t dir, /* <=, ==, or >= */
1700 int *stat) /* success/failure */
1701{
1702 struct xfs_btree_block *block; /* current btree block */
1703 __int64_t diff; /* difference for the current key */
1704 int error; /* error return value */
1705 int keyno; /* current key number */
1706 int level; /* level in the btree */
1707 union xfs_btree_ptr *pp; /* ptr to btree block */
1708 union xfs_btree_ptr ptr; /* ptr to btree block */
1709
1710 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1711 XFS_BTREE_TRACE_ARGI(cur, dir);
1712
1713 XFS_BTREE_STATS_INC(cur, lookup);
1714
1715 block = NULL;
1716 keyno = 0;
1717
1718 /* initialise start pointer from cursor */
1719 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1720 pp = &ptr;
1721
1722 /*
1723 * Iterate over each level in the btree, starting at the root.
1724 * For each level above the leaves, find the key we need, based
1725 * on the lookup record, then follow the corresponding block
1726 * pointer down to the next level.
1727 */
1728 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1729 /* Get the block we need to do the lookup on. */
1730 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1731 if (error)
1732 goto error0;
1733
1734 if (diff == 0) {
1735 /*
1736 * If we already had a key match at a higher level, we
1737 * know we need to use the first entry in this block.
1738 */
1739 keyno = 1;
1740 } else {
1741 /* Otherwise search this block. Do a binary search. */
1742
1743 int high; /* high entry number */
1744 int low; /* low entry number */
1745
1746 /* Set low and high entry numbers, 1-based. */
1747 low = 1;
1748 high = xfs_btree_get_numrecs(block);
1749 if (!high) {
1750 /* Block is empty, must be an empty leaf. */
1751 ASSERT(level == 0 && cur->bc_nlevels == 1);
1752
1753 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1754 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1755 *stat = 0;
1756 return 0;
1757 }
1758
1759 /* Binary search the block. */
1760 while (low <= high) {
1761 union xfs_btree_key key;
1762 union xfs_btree_key *kp;
1763
1764 XFS_BTREE_STATS_INC(cur, compare);
1765
1766 /* keyno is average of low and high. */
1767 keyno = (low + high) >> 1;
1768
1769 /* Get current search key */
1770 kp = xfs_lookup_get_search_key(cur, level,
1771 keyno, block, &key);
1772
1773 /*
1774 * Compute difference to get next direction:
1775 * - less than, move right
1776 * - greater than, move left
1777 * - equal, we're done
1778 */
1779 diff = cur->bc_ops->key_diff(cur, kp);
1780 if (diff < 0)
1781 low = keyno + 1;
1782 else if (diff > 0)
1783 high = keyno - 1;
1784 else
1785 break;
1786 }
1787 }
1788
1789 /*
1790 * If there are more levels, set up for the next level
1791 * by getting the block number and filling in the cursor.
1792 */
1793 if (level > 0) {
1794 /*
1795 * If we moved left, need the previous key number,
1796 * unless there isn't one.
1797 */
1798 if (diff > 0 && --keyno < 1)
1799 keyno = 1;
1800 pp = xfs_btree_ptr_addr(cur, keyno, block);
1801
1802#ifdef DEBUG
1803 error = xfs_btree_check_ptr(cur, pp, 0, level);
1804 if (error)
1805 goto error0;
1806#endif
1807 cur->bc_ptrs[level] = keyno;
1808 }
1809 }
1810
1811 /* Done with the search. See if we need to adjust the results. */
1812 if (dir != XFS_LOOKUP_LE && diff < 0) {
1813 keyno++;
1814 /*
1815 * If ge search and we went off the end of the block, but it's
1816 * not the last block, we're in the wrong block.
1817 */
1818 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1819 if (dir == XFS_LOOKUP_GE &&
1820 keyno > xfs_btree_get_numrecs(block) &&
1821 !xfs_btree_ptr_is_null(cur, &ptr)) {
1822 int i;
1823
1824 cur->bc_ptrs[0] = keyno;
1825 error = xfs_btree_increment(cur, 0, &i);
1826 if (error)
1827 goto error0;
1828 XFS_WANT_CORRUPTED_RETURN(i == 1);
1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1830 *stat = 1;
1831 return 0;
1832 }
1833 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1834 keyno--;
1835 cur->bc_ptrs[0] = keyno;
1836
1837 /* Return if we succeeded or not. */
1838 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1839 *stat = 0;
1840 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1841 *stat = 1;
1842 else
1843 *stat = 0;
1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1845 return 0;
1846
1847error0:
1848 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1849 return error;
1850}
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001851
1852/*
1853 * Update keys at all levels from here to the root along the cursor's path.
1854 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11001855STATIC int
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001856xfs_btree_updkey(
1857 struct xfs_btree_cur *cur,
1858 union xfs_btree_key *keyp,
1859 int level)
1860{
1861 struct xfs_btree_block *block;
1862 struct xfs_buf *bp;
1863 union xfs_btree_key *kp;
1864 int ptr;
1865
1866 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1867 XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1868
1869 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1870
1871 /*
1872 * Go up the tree from this level toward the root.
1873 * At each level, update the key value to the value input.
1874 * Stop when we reach a level where the cursor isn't pointing
1875 * at the first entry in the block.
1876 */
1877 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1878#ifdef DEBUG
1879 int error;
1880#endif
1881 block = xfs_btree_get_block(cur, level, &bp);
1882#ifdef DEBUG
1883 error = xfs_btree_check_block(cur, block, level, bp);
1884 if (error) {
1885 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1886 return error;
1887 }
1888#endif
1889 ptr = cur->bc_ptrs[level];
1890 kp = xfs_btree_key_addr(cur, ptr, block);
1891 xfs_btree_copy_keys(cur, kp, keyp, 1);
1892 xfs_btree_log_keys(cur, bp, ptr, ptr);
1893 }
1894
1895 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1896 return 0;
1897}
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001898
1899/*
1900 * Update the record referred to by cur to the value in the
1901 * given record. This either works (return 0) or gets an
1902 * EFSCORRUPTED error.
1903 */
1904int
1905xfs_btree_update(
1906 struct xfs_btree_cur *cur,
1907 union xfs_btree_rec *rec)
1908{
1909 struct xfs_btree_block *block;
1910 struct xfs_buf *bp;
1911 int error;
1912 int ptr;
1913 union xfs_btree_rec *rp;
1914
1915 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1916 XFS_BTREE_TRACE_ARGR(cur, rec);
1917
1918 /* Pick up the current block. */
1919 block = xfs_btree_get_block(cur, 0, &bp);
1920
1921#ifdef DEBUG
1922 error = xfs_btree_check_block(cur, block, 0, bp);
1923 if (error)
1924 goto error0;
1925#endif
1926 /* Get the address of the rec to be updated. */
1927 ptr = cur->bc_ptrs[0];
1928 rp = xfs_btree_rec_addr(cur, ptr, block);
1929
1930 /* Fill in the new contents and log them. */
1931 xfs_btree_copy_recs(cur, rp, rec, 1);
1932 xfs_btree_log_recs(cur, bp, ptr, ptr);
1933
1934 /*
1935 * If we are tracking the last record in the tree and
1936 * we are at the far right edge of the tree, update it.
1937 */
1938 if (xfs_btree_is_lastrec(cur, block, 0)) {
1939 cur->bc_ops->update_lastrec(cur, block, rec,
1940 ptr, LASTREC_UPDATE);
1941 }
1942
1943 /* Updating first rec in leaf. Pass new key value up to our parent. */
1944 if (ptr == 1) {
1945 union xfs_btree_key key;
1946
1947 cur->bc_ops->init_key_from_rec(&key, rec);
1948 error = xfs_btree_updkey(cur, &key, 1);
1949 if (error)
1950 goto error0;
1951 }
1952
1953 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1954 return 0;
1955
1956error0:
1957 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1958 return error;
1959}
1960
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001961/*
Christoph Hellwig687b8902008-10-30 16:56:53 +11001962 * Move 1 record left from cur/level if possible.
1963 * Update cur to reflect the new path.
1964 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11001965STATIC int /* error */
Christoph Hellwig687b8902008-10-30 16:56:53 +11001966xfs_btree_lshift(
1967 struct xfs_btree_cur *cur,
1968 int level,
1969 int *stat) /* success/failure */
1970{
1971 union xfs_btree_key key; /* btree key */
1972 struct xfs_buf *lbp; /* left buffer pointer */
1973 struct xfs_btree_block *left; /* left btree block */
1974 int lrecs; /* left record count */
1975 struct xfs_buf *rbp; /* right buffer pointer */
1976 struct xfs_btree_block *right; /* right btree block */
1977 int rrecs; /* right record count */
1978 union xfs_btree_ptr lptr; /* left btree pointer */
1979 union xfs_btree_key *rkp = NULL; /* right btree key */
1980 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
1981 union xfs_btree_rec *rrp = NULL; /* right record pointer */
1982 int error; /* error return value */
1983
1984 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1985 XFS_BTREE_TRACE_ARGI(cur, level);
1986
1987 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1988 level == cur->bc_nlevels - 1)
1989 goto out0;
1990
1991 /* Set up variables for this block as "right". */
1992 right = xfs_btree_get_block(cur, level, &rbp);
1993
1994#ifdef DEBUG
1995 error = xfs_btree_check_block(cur, right, level, rbp);
1996 if (error)
1997 goto error0;
1998#endif
1999
2000 /* If we've got no left sibling then we can't shift an entry left. */
2001 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2002 if (xfs_btree_ptr_is_null(cur, &lptr))
2003 goto out0;
2004
2005 /*
2006 * If the cursor entry is the one that would be moved, don't
2007 * do it... it's too complicated.
2008 */
2009 if (cur->bc_ptrs[level] <= 1)
2010 goto out0;
2011
2012 /* Set up the left neighbor as "left". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002013 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002014 if (error)
2015 goto error0;
2016
2017 /* If it's full, it can't take another entry. */
2018 lrecs = xfs_btree_get_numrecs(left);
2019 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2020 goto out0;
2021
2022 rrecs = xfs_btree_get_numrecs(right);
2023
2024 /*
2025 * We add one entry to the left side and remove one for the right side.
Malcolm Parsons9da096f2009-03-29 09:55:42 +02002026 * Account for it here, the changes will be updated on disk and logged
Christoph Hellwig687b8902008-10-30 16:56:53 +11002027 * later.
2028 */
2029 lrecs++;
2030 rrecs--;
2031
2032 XFS_BTREE_STATS_INC(cur, lshift);
2033 XFS_BTREE_STATS_ADD(cur, moves, 1);
2034
2035 /*
2036 * If non-leaf, copy a key and a ptr to the left block.
2037 * Log the changes to the left block.
2038 */
2039 if (level > 0) {
2040 /* It's a non-leaf. Move keys and pointers. */
2041 union xfs_btree_key *lkp; /* left btree key */
2042 union xfs_btree_ptr *lpp; /* left address pointer */
2043
2044 lkp = xfs_btree_key_addr(cur, lrecs, left);
2045 rkp = xfs_btree_key_addr(cur, 1, right);
2046
2047 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2048 rpp = xfs_btree_ptr_addr(cur, 1, right);
2049#ifdef DEBUG
2050 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2051 if (error)
2052 goto error0;
2053#endif
2054 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2055 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2056
2057 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2058 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2059
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002060 ASSERT(cur->bc_ops->keys_inorder(cur,
2061 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002062 } else {
2063 /* It's a leaf. Move records. */
2064 union xfs_btree_rec *lrp; /* left record pointer */
2065
2066 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2067 rrp = xfs_btree_rec_addr(cur, 1, right);
2068
2069 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2070 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2071
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002072 ASSERT(cur->bc_ops->recs_inorder(cur,
2073 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002074 }
2075
2076 xfs_btree_set_numrecs(left, lrecs);
2077 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2078
2079 xfs_btree_set_numrecs(right, rrecs);
2080 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2081
2082 /*
2083 * Slide the contents of right down one entry.
2084 */
2085 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2086 if (level > 0) {
2087 /* It's a nonleaf. operate on keys and ptrs */
2088#ifdef DEBUG
2089 int i; /* loop index */
2090
2091 for (i = 0; i < rrecs; i++) {
2092 error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2093 if (error)
2094 goto error0;
2095 }
2096#endif
2097 xfs_btree_shift_keys(cur,
2098 xfs_btree_key_addr(cur, 2, right),
2099 -1, rrecs);
2100 xfs_btree_shift_ptrs(cur,
2101 xfs_btree_ptr_addr(cur, 2, right),
2102 -1, rrecs);
2103
2104 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2105 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2106 } else {
2107 /* It's a leaf. operate on records */
2108 xfs_btree_shift_recs(cur,
2109 xfs_btree_rec_addr(cur, 2, right),
2110 -1, rrecs);
2111 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2112
2113 /*
2114 * If it's the first record in the block, we'll need a key
2115 * structure to pass up to the next level (updkey).
2116 */
2117 cur->bc_ops->init_key_from_rec(&key,
2118 xfs_btree_rec_addr(cur, 1, right));
2119 rkp = &key;
2120 }
2121
2122 /* Update the parent key values of right. */
2123 error = xfs_btree_updkey(cur, rkp, level + 1);
2124 if (error)
2125 goto error0;
2126
2127 /* Slide the cursor value left one. */
2128 cur->bc_ptrs[level]--;
2129
2130 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2131 *stat = 1;
2132 return 0;
2133
2134out0:
2135 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2136 *stat = 0;
2137 return 0;
2138
2139error0:
2140 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2141 return error;
2142}
2143
2144/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002145 * Move 1 record right from cur/level if possible.
2146 * Update cur to reflect the new path.
2147 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002148STATIC int /* error */
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002149xfs_btree_rshift(
2150 struct xfs_btree_cur *cur,
2151 int level,
2152 int *stat) /* success/failure */
2153{
2154 union xfs_btree_key key; /* btree key */
2155 struct xfs_buf *lbp; /* left buffer pointer */
2156 struct xfs_btree_block *left; /* left btree block */
2157 struct xfs_buf *rbp; /* right buffer pointer */
2158 struct xfs_btree_block *right; /* right btree block */
2159 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2160 union xfs_btree_ptr rptr; /* right block pointer */
2161 union xfs_btree_key *rkp; /* right btree key */
2162 int rrecs; /* right record count */
2163 int lrecs; /* left record count */
2164 int error; /* error return value */
2165 int i; /* loop counter */
2166
2167 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2168 XFS_BTREE_TRACE_ARGI(cur, level);
2169
2170 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2171 (level == cur->bc_nlevels - 1))
2172 goto out0;
2173
2174 /* Set up variables for this block as "left". */
2175 left = xfs_btree_get_block(cur, level, &lbp);
2176
2177#ifdef DEBUG
2178 error = xfs_btree_check_block(cur, left, level, lbp);
2179 if (error)
2180 goto error0;
2181#endif
2182
2183 /* If we've got no right sibling then we can't shift an entry right. */
2184 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2185 if (xfs_btree_ptr_is_null(cur, &rptr))
2186 goto out0;
2187
2188 /*
2189 * If the cursor entry is the one that would be moved, don't
2190 * do it... it's too complicated.
2191 */
2192 lrecs = xfs_btree_get_numrecs(left);
2193 if (cur->bc_ptrs[level] >= lrecs)
2194 goto out0;
2195
2196 /* Set up the right neighbor as "right". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002197 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002198 if (error)
2199 goto error0;
2200
2201 /* If it's full, it can't take another entry. */
2202 rrecs = xfs_btree_get_numrecs(right);
2203 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2204 goto out0;
2205
2206 XFS_BTREE_STATS_INC(cur, rshift);
2207 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2208
2209 /*
2210 * Make a hole at the start of the right neighbor block, then
2211 * copy the last left block entry to the hole.
2212 */
2213 if (level > 0) {
2214 /* It's a nonleaf. make a hole in the keys and ptrs */
2215 union xfs_btree_key *lkp;
2216 union xfs_btree_ptr *lpp;
2217 union xfs_btree_ptr *rpp;
2218
2219 lkp = xfs_btree_key_addr(cur, lrecs, left);
2220 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2221 rkp = xfs_btree_key_addr(cur, 1, right);
2222 rpp = xfs_btree_ptr_addr(cur, 1, right);
2223
2224#ifdef DEBUG
2225 for (i = rrecs - 1; i >= 0; i--) {
2226 error = xfs_btree_check_ptr(cur, rpp, i, level);
2227 if (error)
2228 goto error0;
2229 }
2230#endif
2231
2232 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2233 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2234
2235#ifdef DEBUG
2236 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2237 if (error)
2238 goto error0;
2239#endif
2240
2241 /* Now put the new data in, and log it. */
2242 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2243 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2244
2245 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2246 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2247
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002248 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2249 xfs_btree_key_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002250 } else {
2251 /* It's a leaf. make a hole in the records */
2252 union xfs_btree_rec *lrp;
2253 union xfs_btree_rec *rrp;
2254
2255 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2256 rrp = xfs_btree_rec_addr(cur, 1, right);
2257
2258 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2259
2260 /* Now put the new data in, and log it. */
2261 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2262 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2263
2264 cur->bc_ops->init_key_from_rec(&key, rrp);
2265 rkp = &key;
2266
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002267 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2268 xfs_btree_rec_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002269 }
2270
2271 /*
2272 * Decrement and log left's numrecs, bump and log right's numrecs.
2273 */
2274 xfs_btree_set_numrecs(left, --lrecs);
2275 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2276
2277 xfs_btree_set_numrecs(right, ++rrecs);
2278 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2279
2280 /*
2281 * Using a temporary cursor, update the parent key values of the
2282 * block on the right.
2283 */
2284 error = xfs_btree_dup_cursor(cur, &tcur);
2285 if (error)
2286 goto error0;
2287 i = xfs_btree_lastrec(tcur, level);
2288 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2289
2290 error = xfs_btree_increment(tcur, level, &i);
2291 if (error)
2292 goto error1;
2293
2294 error = xfs_btree_updkey(tcur, rkp, level + 1);
2295 if (error)
2296 goto error1;
2297
2298 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2299
2300 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2301 *stat = 1;
2302 return 0;
2303
2304out0:
2305 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2306 *stat = 0;
2307 return 0;
2308
2309error0:
2310 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2311 return error;
2312
2313error1:
2314 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2315 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2316 return error;
2317}
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002318
2319/*
2320 * Split cur/level block in half.
2321 * Return new block number and the key to its first
2322 * record (to be inserted into parent).
2323 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002324STATIC int /* error */
Dave Chinnercf11da92014-07-15 07:08:24 +10002325__xfs_btree_split(
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002326 struct xfs_btree_cur *cur,
2327 int level,
2328 union xfs_btree_ptr *ptrp,
2329 union xfs_btree_key *key,
2330 struct xfs_btree_cur **curp,
2331 int *stat) /* success/failure */
2332{
2333 union xfs_btree_ptr lptr; /* left sibling block ptr */
2334 struct xfs_buf *lbp; /* left buffer pointer */
2335 struct xfs_btree_block *left; /* left btree block */
2336 union xfs_btree_ptr rptr; /* right sibling block ptr */
2337 struct xfs_buf *rbp; /* right buffer pointer */
2338 struct xfs_btree_block *right; /* right btree block */
2339 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2340 struct xfs_buf *rrbp; /* right-right buffer pointer */
2341 struct xfs_btree_block *rrblock; /* right-right btree block */
2342 int lrecs;
2343 int rrecs;
2344 int src_index;
2345 int error; /* error return value */
2346#ifdef DEBUG
2347 int i;
2348#endif
2349
2350 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2351 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2352
2353 XFS_BTREE_STATS_INC(cur, split);
2354
2355 /* Set up left block (current one). */
2356 left = xfs_btree_get_block(cur, level, &lbp);
2357
2358#ifdef DEBUG
2359 error = xfs_btree_check_block(cur, left, level, lbp);
2360 if (error)
2361 goto error0;
2362#endif
2363
2364 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2365
2366 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002367 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002368 if (error)
2369 goto error0;
2370 if (*stat == 0)
2371 goto out0;
2372 XFS_BTREE_STATS_INC(cur, alloc);
2373
2374 /* Set up the new block as "right". */
2375 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2376 if (error)
2377 goto error0;
2378
2379 /* Fill in the btree header for the new right block. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002380 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002381
2382 /*
2383 * Split the entries between the old and the new block evenly.
2384 * Make sure that if there's an odd number of entries now, that
2385 * each new block will have the same number of entries.
2386 */
2387 lrecs = xfs_btree_get_numrecs(left);
2388 rrecs = lrecs / 2;
2389 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2390 rrecs++;
2391 src_index = (lrecs - rrecs + 1);
2392
2393 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2394
2395 /*
2396 * Copy btree block entries from the left block over to the
2397 * new block, the right. Update the right block and log the
2398 * changes.
2399 */
2400 if (level > 0) {
2401 /* It's a non-leaf. Move keys and pointers. */
2402 union xfs_btree_key *lkp; /* left btree key */
2403 union xfs_btree_ptr *lpp; /* left address pointer */
2404 union xfs_btree_key *rkp; /* right btree key */
2405 union xfs_btree_ptr *rpp; /* right address pointer */
2406
2407 lkp = xfs_btree_key_addr(cur, src_index, left);
2408 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2409 rkp = xfs_btree_key_addr(cur, 1, right);
2410 rpp = xfs_btree_ptr_addr(cur, 1, right);
2411
2412#ifdef DEBUG
2413 for (i = src_index; i < rrecs; i++) {
2414 error = xfs_btree_check_ptr(cur, lpp, i, level);
2415 if (error)
2416 goto error0;
2417 }
2418#endif
2419
2420 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2421 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2422
2423 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2424 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2425
2426 /* Grab the keys to the entries moved to the right block */
2427 xfs_btree_copy_keys(cur, key, rkp, 1);
2428 } else {
2429 /* It's a leaf. Move records. */
2430 union xfs_btree_rec *lrp; /* left record pointer */
2431 union xfs_btree_rec *rrp; /* right record pointer */
2432
2433 lrp = xfs_btree_rec_addr(cur, src_index, left);
2434 rrp = xfs_btree_rec_addr(cur, 1, right);
2435
2436 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2437 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2438
2439 cur->bc_ops->init_key_from_rec(key,
2440 xfs_btree_rec_addr(cur, 1, right));
2441 }
2442
2443
2444 /*
2445 * Find the left block number by looking in the buffer.
2446 * Adjust numrecs, sibling pointers.
2447 */
2448 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2449 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2450 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2451 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2452
2453 lrecs -= rrecs;
2454 xfs_btree_set_numrecs(left, lrecs);
2455 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2456
2457 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2458 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2459
2460 /*
2461 * If there's a block to the new block's right, make that block
2462 * point back to right instead of to left.
2463 */
2464 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002465 error = xfs_btree_read_buf_block(cur, &rrptr,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002466 0, &rrblock, &rrbp);
2467 if (error)
2468 goto error0;
2469 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2470 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2471 }
2472 /*
2473 * If the cursor is really in the right block, move it there.
2474 * If it's just pointing past the last entry in left, then we'll
2475 * insert there, so don't change anything in that case.
2476 */
2477 if (cur->bc_ptrs[level] > lrecs + 1) {
2478 xfs_btree_setbuf(cur, level, rbp);
2479 cur->bc_ptrs[level] -= lrecs;
2480 }
2481 /*
2482 * If there are more levels, we'll need another cursor which refers
2483 * the right block, no matter where this cursor was.
2484 */
2485 if (level + 1 < cur->bc_nlevels) {
2486 error = xfs_btree_dup_cursor(cur, curp);
2487 if (error)
2488 goto error0;
2489 (*curp)->bc_ptrs[level + 1]++;
2490 }
2491 *ptrp = rptr;
2492 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2493 *stat = 1;
2494 return 0;
2495out0:
2496 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2497 *stat = 0;
2498 return 0;
2499
2500error0:
2501 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2502 return error;
2503}
Christoph Hellwig344207c2008-10-30 16:57:16 +11002504
Dave Chinnercf11da92014-07-15 07:08:24 +10002505struct xfs_btree_split_args {
2506 struct xfs_btree_cur *cur;
2507 int level;
2508 union xfs_btree_ptr *ptrp;
2509 union xfs_btree_key *key;
2510 struct xfs_btree_cur **curp;
2511 int *stat; /* success/failure */
2512 int result;
2513 bool kswapd; /* allocation in kswapd context */
2514 struct completion *done;
2515 struct work_struct work;
2516};
2517
2518/*
2519 * Stack switching interfaces for allocation
2520 */
2521static void
2522xfs_btree_split_worker(
2523 struct work_struct *work)
2524{
2525 struct xfs_btree_split_args *args = container_of(work,
2526 struct xfs_btree_split_args, work);
2527 unsigned long pflags;
2528 unsigned long new_pflags = PF_FSTRANS;
2529
2530 /*
2531 * we are in a transaction context here, but may also be doing work
2532 * in kswapd context, and hence we may need to inherit that state
2533 * temporarily to ensure that we don't block waiting for memory reclaim
2534 * in any way.
2535 */
2536 if (args->kswapd)
2537 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2538
2539 current_set_flags_nested(&pflags, new_pflags);
2540
2541 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2542 args->key, args->curp, args->stat);
2543 complete(args->done);
2544
2545 current_restore_flags_nested(&pflags, new_pflags);
2546}
2547
2548/*
2549 * BMBT split requests often come in with little stack to work on. Push
2550 * them off to a worker thread so there is lots of stack to use. For the other
2551 * btree types, just call directly to avoid the context switch overhead here.
2552 */
2553STATIC int /* error */
2554xfs_btree_split(
2555 struct xfs_btree_cur *cur,
2556 int level,
2557 union xfs_btree_ptr *ptrp,
2558 union xfs_btree_key *key,
2559 struct xfs_btree_cur **curp,
2560 int *stat) /* success/failure */
2561{
2562 struct xfs_btree_split_args args;
2563 DECLARE_COMPLETION_ONSTACK(done);
2564
2565 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2566 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2567
2568 args.cur = cur;
2569 args.level = level;
2570 args.ptrp = ptrp;
2571 args.key = key;
2572 args.curp = curp;
2573 args.stat = stat;
2574 args.done = &done;
2575 args.kswapd = current_is_kswapd();
2576 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2577 queue_work(xfs_alloc_wq, &args.work);
2578 wait_for_completion(&done);
2579 destroy_work_on_stack(&args.work);
2580 return args.result;
2581}
2582
2583
Christoph Hellwig344207c2008-10-30 16:57:16 +11002584/*
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002585 * Copy the old inode root contents into a real block and make the
2586 * broot point to it.
2587 */
2588int /* error */
2589xfs_btree_new_iroot(
2590 struct xfs_btree_cur *cur, /* btree cursor */
2591 int *logflags, /* logging flags for inode */
2592 int *stat) /* return status - 0 fail */
2593{
2594 struct xfs_buf *cbp; /* buffer for cblock */
2595 struct xfs_btree_block *block; /* btree block */
2596 struct xfs_btree_block *cblock; /* child btree block */
2597 union xfs_btree_key *ckp; /* child key pointer */
2598 union xfs_btree_ptr *cpp; /* child ptr pointer */
2599 union xfs_btree_key *kp; /* pointer to btree key */
2600 union xfs_btree_ptr *pp; /* pointer to block addr */
2601 union xfs_btree_ptr nptr; /* new block addr */
2602 int level; /* btree level */
2603 int error; /* error return code */
2604#ifdef DEBUG
2605 int i; /* loop counter */
2606#endif
2607
2608 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2609 XFS_BTREE_STATS_INC(cur, newroot);
2610
2611 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2612
2613 level = cur->bc_nlevels - 1;
2614
2615 block = xfs_btree_get_iroot(cur);
2616 pp = xfs_btree_ptr_addr(cur, 1, block);
2617
2618 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002619 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002620 if (error)
2621 goto error0;
2622 if (*stat == 0) {
2623 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2624 return 0;
2625 }
2626 XFS_BTREE_STATS_INC(cur, alloc);
2627
2628 /* Copy the root into a real block. */
2629 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2630 if (error)
2631 goto error0;
2632
Dave Chinner088c9f62013-06-12 12:19:08 +10002633 /*
2634 * we can't just memcpy() the root in for CRC enabled btree blocks.
2635 * In that case have to also ensure the blkno remains correct
2636 */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002637 memcpy(cblock, block, xfs_btree_block_len(cur));
Dave Chinner088c9f62013-06-12 12:19:08 +10002638 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2639 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2640 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2641 else
2642 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2643 }
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002644
2645 be16_add_cpu(&block->bb_level, 1);
2646 xfs_btree_set_numrecs(block, 1);
2647 cur->bc_nlevels++;
2648 cur->bc_ptrs[level + 1] = 1;
2649
2650 kp = xfs_btree_key_addr(cur, 1, block);
2651 ckp = xfs_btree_key_addr(cur, 1, cblock);
2652 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2653
2654 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2655#ifdef DEBUG
2656 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2657 error = xfs_btree_check_ptr(cur, pp, i, level);
2658 if (error)
2659 goto error0;
2660 }
2661#endif
2662 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2663
2664#ifdef DEBUG
2665 error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2666 if (error)
2667 goto error0;
2668#endif
2669 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2670
2671 xfs_iroot_realloc(cur->bc_private.b.ip,
2672 1 - xfs_btree_get_numrecs(cblock),
2673 cur->bc_private.b.whichfork);
2674
2675 xfs_btree_setbuf(cur, level, cbp);
2676
2677 /*
2678 * Do all this logging at the end so that
2679 * the root is at the right level.
2680 */
2681 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2682 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2683 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2684
2685 *logflags |=
Eric Sandeen9d87c312009-01-14 23:22:07 -06002686 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002687 *stat = 1;
2688 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2689 return 0;
2690error0:
2691 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2692 return error;
2693}
2694
2695/*
Christoph Hellwig344207c2008-10-30 16:57:16 +11002696 * Allocate a new root block, fill it in.
2697 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002698STATIC int /* error */
Christoph Hellwig344207c2008-10-30 16:57:16 +11002699xfs_btree_new_root(
2700 struct xfs_btree_cur *cur, /* btree cursor */
2701 int *stat) /* success/failure */
2702{
2703 struct xfs_btree_block *block; /* one half of the old root block */
2704 struct xfs_buf *bp; /* buffer containing block */
2705 int error; /* error return value */
2706 struct xfs_buf *lbp; /* left buffer pointer */
2707 struct xfs_btree_block *left; /* left btree block */
2708 struct xfs_buf *nbp; /* new (root) buffer */
2709 struct xfs_btree_block *new; /* new (root) btree block */
2710 int nptr; /* new value for key index, 1 or 2 */
2711 struct xfs_buf *rbp; /* right buffer pointer */
2712 struct xfs_btree_block *right; /* right btree block */
2713 union xfs_btree_ptr rptr;
2714 union xfs_btree_ptr lptr;
2715
2716 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2717 XFS_BTREE_STATS_INC(cur, newroot);
2718
2719 /* initialise our start point from the cursor */
2720 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2721
2722 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002723 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
Christoph Hellwig344207c2008-10-30 16:57:16 +11002724 if (error)
2725 goto error0;
2726 if (*stat == 0)
2727 goto out0;
2728 XFS_BTREE_STATS_INC(cur, alloc);
2729
2730 /* Set up the new block. */
2731 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2732 if (error)
2733 goto error0;
2734
2735 /* Set the root in the holding structure increasing the level by 1. */
2736 cur->bc_ops->set_root(cur, &lptr, 1);
2737
2738 /*
2739 * At the previous root level there are now two blocks: the old root,
2740 * and the new block generated when it was split. We don't know which
2741 * one the cursor is pointing at, so we set up variables "left" and
2742 * "right" for each case.
2743 */
2744 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2745
2746#ifdef DEBUG
2747 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2748 if (error)
2749 goto error0;
2750#endif
2751
2752 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2753 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2754 /* Our block is left, pick up the right block. */
2755 lbp = bp;
2756 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2757 left = block;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002758 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11002759 if (error)
2760 goto error0;
2761 bp = rbp;
2762 nptr = 1;
2763 } else {
2764 /* Our block is right, pick up the left block. */
2765 rbp = bp;
2766 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2767 right = block;
2768 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002769 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11002770 if (error)
2771 goto error0;
2772 bp = lbp;
2773 nptr = 2;
2774 }
2775 /* Fill in the new block's btree header and log it. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002776 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
Christoph Hellwig344207c2008-10-30 16:57:16 +11002777 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2778 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2779 !xfs_btree_ptr_is_null(cur, &rptr));
2780
2781 /* Fill in the key data in the new root. */
2782 if (xfs_btree_get_level(left) > 0) {
2783 xfs_btree_copy_keys(cur,
2784 xfs_btree_key_addr(cur, 1, new),
2785 xfs_btree_key_addr(cur, 1, left), 1);
2786 xfs_btree_copy_keys(cur,
2787 xfs_btree_key_addr(cur, 2, new),
2788 xfs_btree_key_addr(cur, 1, right), 1);
2789 } else {
2790 cur->bc_ops->init_key_from_rec(
2791 xfs_btree_key_addr(cur, 1, new),
2792 xfs_btree_rec_addr(cur, 1, left));
2793 cur->bc_ops->init_key_from_rec(
2794 xfs_btree_key_addr(cur, 2, new),
2795 xfs_btree_rec_addr(cur, 1, right));
2796 }
2797 xfs_btree_log_keys(cur, nbp, 1, 2);
2798
2799 /* Fill in the pointer data in the new root. */
2800 xfs_btree_copy_ptrs(cur,
2801 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2802 xfs_btree_copy_ptrs(cur,
2803 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2804 xfs_btree_log_ptrs(cur, nbp, 1, 2);
2805
2806 /* Fix up the cursor. */
2807 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2808 cur->bc_ptrs[cur->bc_nlevels] = nptr;
2809 cur->bc_nlevels++;
2810 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2811 *stat = 1;
2812 return 0;
2813error0:
2814 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2815 return error;
2816out0:
2817 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2818 *stat = 0;
2819 return 0;
2820}
Christoph Hellwig4b22a572008-10-30 16:57:40 +11002821
2822STATIC int
2823xfs_btree_make_block_unfull(
2824 struct xfs_btree_cur *cur, /* btree cursor */
2825 int level, /* btree level */
2826 int numrecs,/* # of recs in block */
2827 int *oindex,/* old tree index */
2828 int *index, /* new tree index */
2829 union xfs_btree_ptr *nptr, /* new btree ptr */
2830 struct xfs_btree_cur **ncur, /* new btree cursor */
2831 union xfs_btree_rec *nrec, /* new record */
2832 int *stat)
2833{
2834 union xfs_btree_key key; /* new btree key value */
2835 int error = 0;
2836
2837 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2838 level == cur->bc_nlevels - 1) {
2839 struct xfs_inode *ip = cur->bc_private.b.ip;
2840
2841 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2842 /* A root block that can be made bigger. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11002843 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2844 } else {
2845 /* A root block that needs replacing */
2846 int logflags = 0;
2847
2848 error = xfs_btree_new_iroot(cur, &logflags, stat);
2849 if (error || *stat == 0)
2850 return error;
2851
2852 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2853 }
2854
2855 return 0;
2856 }
2857
2858 /* First, try shifting an entry to the right neighbor. */
2859 error = xfs_btree_rshift(cur, level, stat);
2860 if (error || *stat)
2861 return error;
2862
2863 /* Next, try shifting an entry to the left neighbor. */
2864 error = xfs_btree_lshift(cur, level, stat);
2865 if (error)
2866 return error;
2867
2868 if (*stat) {
2869 *oindex = *index = cur->bc_ptrs[level];
2870 return 0;
2871 }
2872
2873 /*
2874 * Next, try splitting the current block in half.
2875 *
2876 * If this works we have to re-set our variables because we
2877 * could be in a different block now.
2878 */
2879 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2880 if (error || *stat == 0)
2881 return error;
2882
2883
2884 *index = cur->bc_ptrs[level];
2885 cur->bc_ops->init_rec_from_key(&key, nrec);
2886 return 0;
2887}
2888
2889/*
2890 * Insert one record/level. Return information to the caller
2891 * allowing the next level up to proceed if necessary.
2892 */
2893STATIC int
2894xfs_btree_insrec(
2895 struct xfs_btree_cur *cur, /* btree cursor */
2896 int level, /* level to insert record at */
2897 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
2898 union xfs_btree_rec *recp, /* i/o: record data inserted */
2899 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
2900 int *stat) /* success/failure */
2901{
2902 struct xfs_btree_block *block; /* btree block */
2903 struct xfs_buf *bp; /* buffer for block */
2904 union xfs_btree_key key; /* btree key */
2905 union xfs_btree_ptr nptr; /* new block ptr */
2906 struct xfs_btree_cur *ncur; /* new btree cursor */
2907 union xfs_btree_rec nrec; /* new record count */
2908 int optr; /* old key/record index */
2909 int ptr; /* key/record index */
2910 int numrecs;/* number of records */
2911 int error; /* error return value */
2912#ifdef DEBUG
2913 int i;
2914#endif
2915
2916 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2917 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2918
2919 ncur = NULL;
2920
2921 /*
2922 * If we have an external root pointer, and we've made it to the
2923 * root level, allocate a new root block and we're done.
2924 */
2925 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2926 (level >= cur->bc_nlevels)) {
2927 error = xfs_btree_new_root(cur, stat);
2928 xfs_btree_set_ptr_null(cur, ptrp);
2929
2930 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2931 return error;
2932 }
2933
2934 /* If we're off the left edge, return failure. */
2935 ptr = cur->bc_ptrs[level];
2936 if (ptr == 0) {
2937 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2938 *stat = 0;
2939 return 0;
2940 }
2941
2942 /* Make a key out of the record data to be inserted, and save it. */
2943 cur->bc_ops->init_key_from_rec(&key, recp);
2944
2945 optr = ptr;
2946
2947 XFS_BTREE_STATS_INC(cur, insrec);
2948
2949 /* Get pointers to the btree buffer and block. */
2950 block = xfs_btree_get_block(cur, level, &bp);
2951 numrecs = xfs_btree_get_numrecs(block);
2952
2953#ifdef DEBUG
2954 error = xfs_btree_check_block(cur, block, level, bp);
2955 if (error)
2956 goto error0;
2957
2958 /* Check that the new entry is being inserted in the right place. */
2959 if (ptr <= numrecs) {
2960 if (level == 0) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002961 ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2962 xfs_btree_rec_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11002963 } else {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002964 ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2965 xfs_btree_key_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11002966 }
2967 }
2968#endif
2969
2970 /*
2971 * If the block is full, we can't insert the new entry until we
2972 * make the block un-full.
2973 */
2974 xfs_btree_set_ptr_null(cur, &nptr);
2975 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2976 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2977 &optr, &ptr, &nptr, &ncur, &nrec, stat);
2978 if (error || *stat == 0)
2979 goto error0;
2980 }
2981
2982 /*
2983 * The current block may have changed if the block was
2984 * previously full and we have just made space in it.
2985 */
2986 block = xfs_btree_get_block(cur, level, &bp);
2987 numrecs = xfs_btree_get_numrecs(block);
2988
2989#ifdef DEBUG
2990 error = xfs_btree_check_block(cur, block, level, bp);
2991 if (error)
2992 return error;
2993#endif
2994
2995 /*
2996 * At this point we know there's room for our new entry in the block
2997 * we're pointing at.
2998 */
2999 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3000
3001 if (level > 0) {
3002 /* It's a nonleaf. make a hole in the keys and ptrs */
3003 union xfs_btree_key *kp;
3004 union xfs_btree_ptr *pp;
3005
3006 kp = xfs_btree_key_addr(cur, ptr, block);
3007 pp = xfs_btree_ptr_addr(cur, ptr, block);
3008
3009#ifdef DEBUG
3010 for (i = numrecs - ptr; i >= 0; i--) {
3011 error = xfs_btree_check_ptr(cur, pp, i, level);
3012 if (error)
3013 return error;
3014 }
3015#endif
3016
3017 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3018 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3019
3020#ifdef DEBUG
3021 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
3022 if (error)
3023 goto error0;
3024#endif
3025
3026 /* Now put the new data in, bump numrecs and log it. */
3027 xfs_btree_copy_keys(cur, kp, &key, 1);
3028 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3029 numrecs++;
3030 xfs_btree_set_numrecs(block, numrecs);
3031 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3032 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3033#ifdef DEBUG
3034 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003035 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3036 xfs_btree_key_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003037 }
3038#endif
3039 } else {
3040 /* It's a leaf. make a hole in the records */
3041 union xfs_btree_rec *rp;
3042
3043 rp = xfs_btree_rec_addr(cur, ptr, block);
3044
3045 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3046
3047 /* Now put the new data in, bump numrecs and log it. */
3048 xfs_btree_copy_recs(cur, rp, recp, 1);
3049 xfs_btree_set_numrecs(block, ++numrecs);
3050 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3051#ifdef DEBUG
3052 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003053 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3054 xfs_btree_rec_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003055 }
3056#endif
3057 }
3058
3059 /* Log the new number of records in the btree header. */
3060 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3061
3062 /* If we inserted at the start of a block, update the parents' keys. */
3063 if (optr == 1) {
3064 error = xfs_btree_updkey(cur, &key, level + 1);
3065 if (error)
3066 goto error0;
3067 }
3068
3069 /*
3070 * If we are tracking the last record in the tree and
3071 * we are at the far right edge of the tree, update it.
3072 */
3073 if (xfs_btree_is_lastrec(cur, block, level)) {
3074 cur->bc_ops->update_lastrec(cur, block, recp,
3075 ptr, LASTREC_INSREC);
3076 }
3077
3078 /*
3079 * Return the new block number, if any.
3080 * If there is one, give back a record value and a cursor too.
3081 */
3082 *ptrp = nptr;
3083 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
3084 *recp = nrec;
3085 *curp = ncur;
3086 }
3087
3088 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3089 *stat = 1;
3090 return 0;
3091
3092error0:
3093 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3094 return error;
3095}
3096
3097/*
3098 * Insert the record at the point referenced by cur.
3099 *
3100 * A multi-level split of the tree on insert will invalidate the original
3101 * cursor. All callers of this function should assume that the cursor is
3102 * no longer valid and revalidate it.
3103 */
3104int
3105xfs_btree_insert(
3106 struct xfs_btree_cur *cur,
3107 int *stat)
3108{
3109 int error; /* error return value */
3110 int i; /* result value, 0 for failure */
3111 int level; /* current level number in btree */
3112 union xfs_btree_ptr nptr; /* new block number (split result) */
3113 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3114 struct xfs_btree_cur *pcur; /* previous level's cursor */
3115 union xfs_btree_rec rec; /* record to insert */
3116
3117 level = 0;
3118 ncur = NULL;
3119 pcur = cur;
3120
3121 xfs_btree_set_ptr_null(cur, &nptr);
3122 cur->bc_ops->init_rec_from_cur(cur, &rec);
3123
3124 /*
3125 * Loop going up the tree, starting at the leaf level.
3126 * Stop when we don't get a split block, that must mean that
3127 * the insert is finished with this level.
3128 */
3129 do {
3130 /*
3131 * Insert nrec/nptr into this level of the tree.
3132 * Note if we fail, nptr will be null.
3133 */
3134 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
3135 if (error) {
3136 if (pcur != cur)
3137 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3138 goto error0;
3139 }
3140
3141 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3142 level++;
3143
3144 /*
3145 * See if the cursor we just used is trash.
3146 * Can't trash the caller's cursor, but otherwise we should
3147 * if ncur is a new cursor or we're about to be done.
3148 */
3149 if (pcur != cur &&
3150 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3151 /* Save the state from the cursor before we trash it */
3152 if (cur->bc_ops->update_cursor)
3153 cur->bc_ops->update_cursor(pcur, cur);
3154 cur->bc_nlevels = pcur->bc_nlevels;
3155 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3156 }
3157 /* If we got a new cursor, switch to it. */
3158 if (ncur) {
3159 pcur = ncur;
3160 ncur = NULL;
3161 }
3162 } while (!xfs_btree_ptr_is_null(cur, &nptr));
3163
3164 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3165 *stat = i;
3166 return 0;
3167error0:
3168 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3169 return error;
3170}
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003171
3172/*
3173 * Try to merge a non-leaf block back into the inode root.
3174 *
3175 * Note: the killroot names comes from the fact that we're effectively
3176 * killing the old root block. But because we can't just delete the
3177 * inode we have to copy the single block it was pointing to into the
3178 * inode.
3179 */
Eric Sandeend96f8f82009-07-02 00:09:33 -05003180STATIC int
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003181xfs_btree_kill_iroot(
3182 struct xfs_btree_cur *cur)
3183{
3184 int whichfork = cur->bc_private.b.whichfork;
3185 struct xfs_inode *ip = cur->bc_private.b.ip;
3186 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3187 struct xfs_btree_block *block;
3188 struct xfs_btree_block *cblock;
3189 union xfs_btree_key *kp;
3190 union xfs_btree_key *ckp;
3191 union xfs_btree_ptr *pp;
3192 union xfs_btree_ptr *cpp;
3193 struct xfs_buf *cbp;
3194 int level;
3195 int index;
3196 int numrecs;
3197#ifdef DEBUG
3198 union xfs_btree_ptr ptr;
3199 int i;
3200#endif
3201
3202 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3203
3204 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3205 ASSERT(cur->bc_nlevels > 1);
3206
3207 /*
3208 * Don't deal with the root block needs to be a leaf case.
3209 * We're just going to turn the thing back into extents anyway.
3210 */
3211 level = cur->bc_nlevels - 1;
3212 if (level == 1)
3213 goto out0;
3214
3215 /*
3216 * Give up if the root has multiple children.
3217 */
3218 block = xfs_btree_get_iroot(cur);
3219 if (xfs_btree_get_numrecs(block) != 1)
3220 goto out0;
3221
3222 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3223 numrecs = xfs_btree_get_numrecs(cblock);
3224
3225 /*
3226 * Only do this if the next level will fit.
3227 * Then the data must be copied up to the inode,
3228 * instead of freeing the root you free the next level.
3229 */
3230 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3231 goto out0;
3232
3233 XFS_BTREE_STATS_INC(cur, killroot);
3234
3235#ifdef DEBUG
3236 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3237 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3238 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3239 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3240#endif
3241
3242 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3243 if (index) {
3244 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3245 cur->bc_private.b.whichfork);
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003246 block = ifp->if_broot;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003247 }
3248
3249 be16_add_cpu(&block->bb_numrecs, index);
3250 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3251
3252 kp = xfs_btree_key_addr(cur, 1, block);
3253 ckp = xfs_btree_key_addr(cur, 1, cblock);
3254 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3255
3256 pp = xfs_btree_ptr_addr(cur, 1, block);
3257 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3258#ifdef DEBUG
3259 for (i = 0; i < numrecs; i++) {
3260 int error;
3261
3262 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3263 if (error) {
3264 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3265 return error;
3266 }
3267 }
3268#endif
3269 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3270
3271 cur->bc_ops->free_block(cur, cbp);
3272 XFS_BTREE_STATS_INC(cur, free);
3273
3274 cur->bc_bufs[level - 1] = NULL;
3275 be16_add_cpu(&block->bb_level, -1);
3276 xfs_trans_log_inode(cur->bc_tp, ip,
Eric Sandeen9d87c312009-01-14 23:22:07 -06003277 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003278 cur->bc_nlevels--;
3279out0:
3280 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3281 return 0;
3282}
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003283
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003284/*
3285 * Kill the current root node, and replace it with it's only child node.
3286 */
3287STATIC int
3288xfs_btree_kill_root(
3289 struct xfs_btree_cur *cur,
3290 struct xfs_buf *bp,
3291 int level,
3292 union xfs_btree_ptr *newroot)
3293{
3294 int error;
3295
3296 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3297 XFS_BTREE_STATS_INC(cur, killroot);
3298
3299 /*
3300 * Update the root pointer, decreasing the level by 1 and then
3301 * free the old root.
3302 */
3303 cur->bc_ops->set_root(cur, newroot, -1);
3304
3305 error = cur->bc_ops->free_block(cur, bp);
3306 if (error) {
3307 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3308 return error;
3309 }
3310
3311 XFS_BTREE_STATS_INC(cur, free);
3312
3313 cur->bc_bufs[level] = NULL;
3314 cur->bc_ra[level] = 0;
3315 cur->bc_nlevels--;
3316
3317 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3318 return 0;
3319}
3320
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003321STATIC int
3322xfs_btree_dec_cursor(
3323 struct xfs_btree_cur *cur,
3324 int level,
3325 int *stat)
3326{
3327 int error;
3328 int i;
3329
3330 if (level > 0) {
3331 error = xfs_btree_decrement(cur, level, &i);
3332 if (error)
3333 return error;
3334 }
3335
3336 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3337 *stat = 1;
3338 return 0;
3339}
3340
3341/*
3342 * Single level of the btree record deletion routine.
3343 * Delete record pointed to by cur/level.
3344 * Remove the record from its block then rebalance the tree.
3345 * Return 0 for error, 1 for done, 2 to go on to the next level.
3346 */
3347STATIC int /* error */
3348xfs_btree_delrec(
3349 struct xfs_btree_cur *cur, /* btree cursor */
3350 int level, /* level removing record from */
3351 int *stat) /* fail/done/go-on */
3352{
3353 struct xfs_btree_block *block; /* btree block */
3354 union xfs_btree_ptr cptr; /* current block ptr */
3355 struct xfs_buf *bp; /* buffer for block */
3356 int error; /* error return value */
3357 int i; /* loop counter */
3358 union xfs_btree_key key; /* storage for keyp */
3359 union xfs_btree_key *keyp = &key; /* passed to the next level */
3360 union xfs_btree_ptr lptr; /* left sibling block ptr */
3361 struct xfs_buf *lbp; /* left buffer pointer */
3362 struct xfs_btree_block *left; /* left btree block */
3363 int lrecs = 0; /* left record count */
3364 int ptr; /* key/record index */
3365 union xfs_btree_ptr rptr; /* right sibling block ptr */
3366 struct xfs_buf *rbp; /* right buffer pointer */
3367 struct xfs_btree_block *right; /* right btree block */
3368 struct xfs_btree_block *rrblock; /* right-right btree block */
3369 struct xfs_buf *rrbp; /* right-right buffer pointer */
3370 int rrecs = 0; /* right record count */
3371 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3372 int numrecs; /* temporary numrec count */
3373
3374 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3375 XFS_BTREE_TRACE_ARGI(cur, level);
3376
3377 tcur = NULL;
3378
3379 /* Get the index of the entry being deleted, check for nothing there. */
3380 ptr = cur->bc_ptrs[level];
3381 if (ptr == 0) {
3382 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3383 *stat = 0;
3384 return 0;
3385 }
3386
3387 /* Get the buffer & block containing the record or key/ptr. */
3388 block = xfs_btree_get_block(cur, level, &bp);
3389 numrecs = xfs_btree_get_numrecs(block);
3390
3391#ifdef DEBUG
3392 error = xfs_btree_check_block(cur, block, level, bp);
3393 if (error)
3394 goto error0;
3395#endif
3396
3397 /* Fail if we're off the end of the block. */
3398 if (ptr > numrecs) {
3399 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3400 *stat = 0;
3401 return 0;
3402 }
3403
3404 XFS_BTREE_STATS_INC(cur, delrec);
3405 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3406
3407 /* Excise the entries being deleted. */
3408 if (level > 0) {
3409 /* It's a nonleaf. operate on keys and ptrs */
3410 union xfs_btree_key *lkp;
3411 union xfs_btree_ptr *lpp;
3412
3413 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3414 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3415
3416#ifdef DEBUG
3417 for (i = 0; i < numrecs - ptr; i++) {
3418 error = xfs_btree_check_ptr(cur, lpp, i, level);
3419 if (error)
3420 goto error0;
3421 }
3422#endif
3423
3424 if (ptr < numrecs) {
3425 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3426 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3427 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3428 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3429 }
3430
3431 /*
3432 * If it's the first record in the block, we'll need to pass a
3433 * key up to the next level (updkey).
3434 */
3435 if (ptr == 1)
3436 keyp = xfs_btree_key_addr(cur, 1, block);
3437 } else {
3438 /* It's a leaf. operate on records */
3439 if (ptr < numrecs) {
3440 xfs_btree_shift_recs(cur,
3441 xfs_btree_rec_addr(cur, ptr + 1, block),
3442 -1, numrecs - ptr);
3443 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3444 }
3445
3446 /*
3447 * If it's the first record in the block, we'll need a key
3448 * structure to pass up to the next level (updkey).
3449 */
3450 if (ptr == 1) {
3451 cur->bc_ops->init_key_from_rec(&key,
3452 xfs_btree_rec_addr(cur, 1, block));
3453 keyp = &key;
3454 }
3455 }
3456
3457 /*
3458 * Decrement and log the number of entries in the block.
3459 */
3460 xfs_btree_set_numrecs(block, --numrecs);
3461 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3462
3463 /*
3464 * If we are tracking the last record in the tree and
3465 * we are at the far right edge of the tree, update it.
3466 */
3467 if (xfs_btree_is_lastrec(cur, block, level)) {
3468 cur->bc_ops->update_lastrec(cur, block, NULL,
3469 ptr, LASTREC_DELREC);
3470 }
3471
3472 /*
3473 * We're at the root level. First, shrink the root block in-memory.
3474 * Try to get rid of the next level down. If we can't then there's
3475 * nothing left to do.
3476 */
3477 if (level == cur->bc_nlevels - 1) {
3478 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3479 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3480 cur->bc_private.b.whichfork);
3481
3482 error = xfs_btree_kill_iroot(cur);
3483 if (error)
3484 goto error0;
3485
3486 error = xfs_btree_dec_cursor(cur, level, stat);
3487 if (error)
3488 goto error0;
3489 *stat = 1;
3490 return 0;
3491 }
3492
3493 /*
3494 * If this is the root level, and there's only one entry left,
3495 * and it's NOT the leaf level, then we can get rid of this
3496 * level.
3497 */
3498 if (numrecs == 1 && level > 0) {
3499 union xfs_btree_ptr *pp;
3500 /*
3501 * pp is still set to the first pointer in the block.
3502 * Make it the new root of the btree.
3503 */
3504 pp = xfs_btree_ptr_addr(cur, 1, block);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003505 error = xfs_btree_kill_root(cur, bp, level, pp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003506 if (error)
3507 goto error0;
3508 } else if (level > 0) {
3509 error = xfs_btree_dec_cursor(cur, level, stat);
3510 if (error)
3511 goto error0;
3512 }
3513 *stat = 1;
3514 return 0;
3515 }
3516
3517 /*
3518 * If we deleted the leftmost entry in the block, update the
3519 * key values above us in the tree.
3520 */
3521 if (ptr == 1) {
3522 error = xfs_btree_updkey(cur, keyp, level + 1);
3523 if (error)
3524 goto error0;
3525 }
3526
3527 /*
3528 * If the number of records remaining in the block is at least
3529 * the minimum, we're done.
3530 */
3531 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3532 error = xfs_btree_dec_cursor(cur, level, stat);
3533 if (error)
3534 goto error0;
3535 return 0;
3536 }
3537
3538 /*
3539 * Otherwise, we have to move some records around to keep the
3540 * tree balanced. Look at the left and right sibling blocks to
3541 * see if we can re-balance by moving only one record.
3542 */
3543 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3544 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3545
3546 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3547 /*
3548 * One child of root, need to get a chance to copy its contents
3549 * into the root and delete it. Can't go up to next level,
3550 * there's nothing to delete there.
3551 */
3552 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3553 xfs_btree_ptr_is_null(cur, &lptr) &&
3554 level == cur->bc_nlevels - 2) {
3555 error = xfs_btree_kill_iroot(cur);
3556 if (!error)
3557 error = xfs_btree_dec_cursor(cur, level, stat);
3558 if (error)
3559 goto error0;
3560 return 0;
3561 }
3562 }
3563
3564 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3565 !xfs_btree_ptr_is_null(cur, &lptr));
3566
3567 /*
3568 * Duplicate the cursor so our btree manipulations here won't
3569 * disrupt the next level up.
3570 */
3571 error = xfs_btree_dup_cursor(cur, &tcur);
3572 if (error)
3573 goto error0;
3574
3575 /*
3576 * If there's a right sibling, see if it's ok to shift an entry
3577 * out of it.
3578 */
3579 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3580 /*
3581 * Move the temp cursor to the last entry in the next block.
3582 * Actually any entry but the first would suffice.
3583 */
3584 i = xfs_btree_lastrec(tcur, level);
3585 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3586
3587 error = xfs_btree_increment(tcur, level, &i);
3588 if (error)
3589 goto error0;
3590 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3591
3592 i = xfs_btree_lastrec(tcur, level);
3593 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3594
3595 /* Grab a pointer to the block. */
3596 right = xfs_btree_get_block(tcur, level, &rbp);
3597#ifdef DEBUG
3598 error = xfs_btree_check_block(tcur, right, level, rbp);
3599 if (error)
3600 goto error0;
3601#endif
3602 /* Grab the current block number, for future use. */
3603 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3604
3605 /*
3606 * If right block is full enough so that removing one entry
3607 * won't make it too empty, and left-shifting an entry out
3608 * of right to us works, we're done.
3609 */
3610 if (xfs_btree_get_numrecs(right) - 1 >=
3611 cur->bc_ops->get_minrecs(tcur, level)) {
3612 error = xfs_btree_lshift(tcur, level, &i);
3613 if (error)
3614 goto error0;
3615 if (i) {
3616 ASSERT(xfs_btree_get_numrecs(block) >=
3617 cur->bc_ops->get_minrecs(tcur, level));
3618
3619 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3620 tcur = NULL;
3621
3622 error = xfs_btree_dec_cursor(cur, level, stat);
3623 if (error)
3624 goto error0;
3625 return 0;
3626 }
3627 }
3628
3629 /*
3630 * Otherwise, grab the number of records in right for
3631 * future reference, and fix up the temp cursor to point
3632 * to our block again (last record).
3633 */
3634 rrecs = xfs_btree_get_numrecs(right);
3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3636 i = xfs_btree_firstrec(tcur, level);
3637 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3638
3639 error = xfs_btree_decrement(tcur, level, &i);
3640 if (error)
3641 goto error0;
3642 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3643 }
3644 }
3645
3646 /*
3647 * If there's a left sibling, see if it's ok to shift an entry
3648 * out of it.
3649 */
3650 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3651 /*
3652 * Move the temp cursor to the first entry in the
3653 * previous block.
3654 */
3655 i = xfs_btree_firstrec(tcur, level);
3656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3657
3658 error = xfs_btree_decrement(tcur, level, &i);
3659 if (error)
3660 goto error0;
3661 i = xfs_btree_firstrec(tcur, level);
3662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3663
3664 /* Grab a pointer to the block. */
3665 left = xfs_btree_get_block(tcur, level, &lbp);
3666#ifdef DEBUG
3667 error = xfs_btree_check_block(cur, left, level, lbp);
3668 if (error)
3669 goto error0;
3670#endif
3671 /* Grab the current block number, for future use. */
3672 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3673
3674 /*
3675 * If left block is full enough so that removing one entry
3676 * won't make it too empty, and right-shifting an entry out
3677 * of left to us works, we're done.
3678 */
3679 if (xfs_btree_get_numrecs(left) - 1 >=
3680 cur->bc_ops->get_minrecs(tcur, level)) {
3681 error = xfs_btree_rshift(tcur, level, &i);
3682 if (error)
3683 goto error0;
3684 if (i) {
3685 ASSERT(xfs_btree_get_numrecs(block) >=
3686 cur->bc_ops->get_minrecs(tcur, level));
3687 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3688 tcur = NULL;
3689 if (level == 0)
3690 cur->bc_ptrs[0]++;
3691 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3692 *stat = 1;
3693 return 0;
3694 }
3695 }
3696
3697 /*
3698 * Otherwise, grab the number of records in right for
3699 * future reference.
3700 */
3701 lrecs = xfs_btree_get_numrecs(left);
3702 }
3703
3704 /* Delete the temp cursor, we're done with it. */
3705 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3706 tcur = NULL;
3707
3708 /* If here, we need to do a join to keep the tree balanced. */
3709 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3710
3711 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3712 lrecs + xfs_btree_get_numrecs(block) <=
3713 cur->bc_ops->get_maxrecs(cur, level)) {
3714 /*
3715 * Set "right" to be the starting block,
3716 * "left" to be the left neighbor.
3717 */
3718 rptr = cptr;
3719 right = block;
3720 rbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003721 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003722 if (error)
3723 goto error0;
3724
3725 /*
3726 * If that won't work, see if we can join with the right neighbor block.
3727 */
3728 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3729 rrecs + xfs_btree_get_numrecs(block) <=
3730 cur->bc_ops->get_maxrecs(cur, level)) {
3731 /*
3732 * Set "left" to be the starting block,
3733 * "right" to be the right neighbor.
3734 */
3735 lptr = cptr;
3736 left = block;
3737 lbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003738 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003739 if (error)
3740 goto error0;
3741
3742 /*
3743 * Otherwise, we can't fix the imbalance.
3744 * Just return. This is probably a logic error, but it's not fatal.
3745 */
3746 } else {
3747 error = xfs_btree_dec_cursor(cur, level, stat);
3748 if (error)
3749 goto error0;
3750 return 0;
3751 }
3752
3753 rrecs = xfs_btree_get_numrecs(right);
3754 lrecs = xfs_btree_get_numrecs(left);
3755
3756 /*
3757 * We're now going to join "left" and "right" by moving all the stuff
3758 * in "right" to "left" and deleting "right".
3759 */
3760 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3761 if (level > 0) {
3762 /* It's a non-leaf. Move keys and pointers. */
3763 union xfs_btree_key *lkp; /* left btree key */
3764 union xfs_btree_ptr *lpp; /* left address pointer */
3765 union xfs_btree_key *rkp; /* right btree key */
3766 union xfs_btree_ptr *rpp; /* right address pointer */
3767
3768 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3769 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3770 rkp = xfs_btree_key_addr(cur, 1, right);
3771 rpp = xfs_btree_ptr_addr(cur, 1, right);
3772#ifdef DEBUG
3773 for (i = 1; i < rrecs; i++) {
3774 error = xfs_btree_check_ptr(cur, rpp, i, level);
3775 if (error)
3776 goto error0;
3777 }
3778#endif
3779 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3780 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3781
3782 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3783 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3784 } else {
3785 /* It's a leaf. Move records. */
3786 union xfs_btree_rec *lrp; /* left record pointer */
3787 union xfs_btree_rec *rrp; /* right record pointer */
3788
3789 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3790 rrp = xfs_btree_rec_addr(cur, 1, right);
3791
3792 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3793 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3794 }
3795
3796 XFS_BTREE_STATS_INC(cur, join);
3797
3798 /*
Malcolm Parsons9da096f2009-03-29 09:55:42 +02003799 * Fix up the number of records and right block pointer in the
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003800 * surviving block, and log it.
3801 */
3802 xfs_btree_set_numrecs(left, lrecs + rrecs);
3803 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3804 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3805 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3806
3807 /* If there is a right sibling, point it to the remaining block. */
3808 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3809 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003810 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003811 if (error)
3812 goto error0;
3813 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3814 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3815 }
3816
3817 /* Free the deleted block. */
3818 error = cur->bc_ops->free_block(cur, rbp);
3819 if (error)
3820 goto error0;
3821 XFS_BTREE_STATS_INC(cur, free);
3822
3823 /*
3824 * If we joined with the left neighbor, set the buffer in the
3825 * cursor to the left block, and fix up the index.
3826 */
3827 if (bp != lbp) {
3828 cur->bc_bufs[level] = lbp;
3829 cur->bc_ptrs[level] += lrecs;
3830 cur->bc_ra[level] = 0;
3831 }
3832 /*
3833 * If we joined with the right neighbor and there's a level above
3834 * us, increment the cursor at that level.
3835 */
3836 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3837 (level + 1 < cur->bc_nlevels)) {
3838 error = xfs_btree_increment(cur, level + 1, &i);
3839 if (error)
3840 goto error0;
3841 }
3842
3843 /*
3844 * Readjust the ptr at this level if it's not a leaf, since it's
3845 * still pointing at the deletion point, which makes the cursor
3846 * inconsistent. If this makes the ptr 0, the caller fixes it up.
3847 * We can't use decrement because it would change the next level up.
3848 */
3849 if (level > 0)
3850 cur->bc_ptrs[level]--;
3851
3852 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3853 /* Return value means the next level up has something to do. */
3854 *stat = 2;
3855 return 0;
3856
3857error0:
3858 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3859 if (tcur)
3860 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3861 return error;
3862}
3863
3864/*
3865 * Delete the record pointed to by cur.
3866 * The cursor refers to the place where the record was (could be inserted)
3867 * when the operation returns.
3868 */
3869int /* error */
3870xfs_btree_delete(
3871 struct xfs_btree_cur *cur,
3872 int *stat) /* success/failure */
3873{
3874 int error; /* error return value */
3875 int level;
3876 int i;
3877
3878 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3879
3880 /*
3881 * Go up the tree, starting at leaf level.
3882 *
3883 * If 2 is returned then a join was done; go to the next level.
3884 * Otherwise we are done.
3885 */
3886 for (level = 0, i = 2; i == 2; level++) {
3887 error = xfs_btree_delrec(cur, level, &i);
3888 if (error)
3889 goto error0;
3890 }
3891
3892 if (i == 0) {
3893 for (level = 1; level < cur->bc_nlevels; level++) {
3894 if (cur->bc_ptrs[level] == 0) {
3895 error = xfs_btree_decrement(cur, level, &i);
3896 if (error)
3897 goto error0;
3898 break;
3899 }
3900 }
3901 }
3902
3903 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3904 *stat = i;
3905 return 0;
3906error0:
3907 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3908 return error;
3909}
Christoph Hellwig8cc938f2008-10-30 16:58:11 +11003910
3911/*
3912 * Get the data from the pointed-to record.
3913 */
3914int /* error */
3915xfs_btree_get_rec(
3916 struct xfs_btree_cur *cur, /* btree cursor */
3917 union xfs_btree_rec **recp, /* output: btree record */
3918 int *stat) /* output: success/failure */
3919{
3920 struct xfs_btree_block *block; /* btree block */
3921 struct xfs_buf *bp; /* buffer pointer */
3922 int ptr; /* record number */
3923#ifdef DEBUG
3924 int error; /* error return value */
3925#endif
3926
3927 ptr = cur->bc_ptrs[0];
3928 block = xfs_btree_get_block(cur, 0, &bp);
3929
3930#ifdef DEBUG
3931 error = xfs_btree_check_block(cur, block, 0, bp);
3932 if (error)
3933 return error;
3934#endif
3935
3936 /*
3937 * Off the right end or left end, return failure.
3938 */
3939 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3940 *stat = 0;
3941 return 0;
3942 }
3943
3944 /*
3945 * Point to the record and extract its data.
3946 */
3947 *recp = xfs_btree_rec_addr(cur, ptr, block);
3948 *stat = 1;
3949 return 0;
3950}
Dave Chinner21b5c972013-08-30 10:23:44 +10003951
3952/*
3953 * Change the owner of a btree.
3954 *
3955 * The mechanism we use here is ordered buffer logging. Because we don't know
3956 * how many buffers were are going to need to modify, we don't really want to
3957 * have to make transaction reservations for the worst case of every buffer in a
3958 * full size btree as that may be more space that we can fit in the log....
3959 *
3960 * We do the btree walk in the most optimal manner possible - we have sibling
3961 * pointers so we can just walk all the blocks on each level from left to right
3962 * in a single pass, and then move to the next level and do the same. We can
3963 * also do readahead on the sibling pointers to get IO moving more quickly,
3964 * though for slow disks this is unlikely to make much difference to performance
3965 * as the amount of CPU work we have to do before moving to the next block is
3966 * relatively small.
3967 *
3968 * For each btree block that we load, modify the owner appropriately, set the
3969 * buffer as an ordered buffer and log it appropriately. We need to ensure that
3970 * we mark the region we change dirty so that if the buffer is relogged in
3971 * a subsequent transaction the changes we make here as an ordered buffer are
Dave Chinner638f44162013-08-30 10:23:45 +10003972 * correctly relogged in that transaction. If we are in recovery context, then
3973 * just queue the modified buffer as delayed write buffer so the transaction
3974 * recovery completion writes the changes to disk.
Dave Chinner21b5c972013-08-30 10:23:44 +10003975 */
3976static int
3977xfs_btree_block_change_owner(
3978 struct xfs_btree_cur *cur,
3979 int level,
Dave Chinner638f44162013-08-30 10:23:45 +10003980 __uint64_t new_owner,
3981 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10003982{
3983 struct xfs_btree_block *block;
3984 struct xfs_buf *bp;
3985 union xfs_btree_ptr rptr;
3986
3987 /* do right sibling readahead */
3988 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
3989
3990 /* modify the owner */
3991 block = xfs_btree_get_block(cur, level, &bp);
3992 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
3993 block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
3994 else
3995 block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
3996
3997 /*
Dave Chinner638f44162013-08-30 10:23:45 +10003998 * If the block is a root block hosted in an inode, we might not have a
3999 * buffer pointer here and we shouldn't attempt to log the change as the
4000 * information is already held in the inode and discarded when the root
4001 * block is formatted into the on-disk inode fork. We still change it,
4002 * though, so everything is consistent in memory.
Dave Chinner21b5c972013-08-30 10:23:44 +10004003 */
4004 if (bp) {
Dave Chinner638f44162013-08-30 10:23:45 +10004005 if (cur->bc_tp) {
4006 xfs_trans_ordered_buf(cur->bc_tp, bp);
4007 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4008 } else {
4009 xfs_buf_delwri_queue(bp, buffer_list);
4010 }
Dave Chinner21b5c972013-08-30 10:23:44 +10004011 } else {
4012 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4013 ASSERT(level == cur->bc_nlevels - 1);
4014 }
4015
4016 /* now read rh sibling block for next iteration */
4017 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4018 if (xfs_btree_ptr_is_null(cur, &rptr))
Dave Chinner24513372014-06-25 14:58:08 +10004019 return -ENOENT;
Dave Chinner21b5c972013-08-30 10:23:44 +10004020
4021 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4022}
4023
4024int
4025xfs_btree_change_owner(
4026 struct xfs_btree_cur *cur,
Dave Chinner638f44162013-08-30 10:23:45 +10004027 __uint64_t new_owner,
4028 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10004029{
4030 union xfs_btree_ptr lptr;
4031 int level;
4032 struct xfs_btree_block *block = NULL;
4033 int error = 0;
4034
4035 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4036
4037 /* for each level */
4038 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4039 /* grab the left hand block */
4040 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4041 if (error)
4042 return error;
4043
4044 /* readahead the left most block for the next level down */
4045 if (level > 0) {
4046 union xfs_btree_ptr *ptr;
4047
4048 ptr = xfs_btree_ptr_addr(cur, 1, block);
4049 xfs_btree_readahead_ptr(cur, ptr, 1);
4050
4051 /* save for the next iteration of the loop */
4052 lptr = *ptr;
4053 }
4054
4055 /* for each buffer in the level */
4056 do {
4057 error = xfs_btree_block_change_owner(cur, level,
Dave Chinner638f44162013-08-30 10:23:45 +10004058 new_owner,
4059 buffer_list);
Dave Chinner21b5c972013-08-30 10:23:44 +10004060 } while (!error);
4061
Dave Chinner24513372014-06-25 14:58:08 +10004062 if (error != -ENOENT)
Dave Chinner21b5c972013-08-30 10:23:44 +10004063 return error;
4064 }
4065
4066 return 0;
4067}