blob: 1c842dce44b637a75402c95ed01c847b27a9cfa0 [file] [log] [blame]
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110023#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100025#include "xfs_mount.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100026#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110027#include "xfs_trans.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100028#include "xfs_inode_item.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110029#include "xfs_bmap_btree.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100030#include "xfs_bmap.h"
31#include "xfs_error.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100032#include "xfs_trace.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110033#include "xfs_attr_sf.h"
Darrick J. Wong244efea2016-02-08 15:00:01 +110034#include "xfs_da_format.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100035
36kmem_zone_t *xfs_ifork_zone;
37
38STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
39STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
40STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
41
42#ifdef DEBUG
43/*
44 * Make sure that the extents in the given memory buffer
45 * are valid.
46 */
47void
48xfs_validate_extents(
49 xfs_ifork_t *ifp,
50 int nrecs,
51 xfs_exntfmt_t fmt)
52{
53 xfs_bmbt_irec_t irec;
54 xfs_bmbt_rec_host_t rec;
55 int i;
56
57 for (i = 0; i < nrecs; i++) {
58 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
59 rec.l0 = get_unaligned(&ep->l0);
60 rec.l1 = get_unaligned(&ep->l1);
61 xfs_bmbt_get_all(&rec, &irec);
62 if (fmt == XFS_EXTFMT_NOSTATE)
63 ASSERT(irec.br_state == XFS_EXT_NORM);
64 }
65}
66#else /* DEBUG */
67#define xfs_validate_extents(ifp, nrecs, fmt)
68#endif /* DEBUG */
69
70
71/*
72 * Move inode type and inode format specific information from the
73 * on-disk inode to the in-core inode. For fifos, devs, and sockets
74 * this means set if_rdev to the proper value. For files, directories,
75 * and symlinks this means to bring in the in-line data or extent
76 * pointers. For a file in B-tree format, only the root is immediately
77 * brought in-core. The rest will be in-lined in if_extents when it
78 * is first referenced (see xfs_iread_extents()).
79 */
80int
81xfs_iformat_fork(
82 xfs_inode_t *ip,
83 xfs_dinode_t *dip)
84{
85 xfs_attr_shortform_t *atp;
86 int size;
87 int error = 0;
88 xfs_fsize_t di_size;
89
90 if (unlikely(be32_to_cpu(dip->di_nextents) +
91 be16_to_cpu(dip->di_anextents) >
92 be64_to_cpu(dip->di_nblocks))) {
93 xfs_warn(ip->i_mount,
94 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
95 (unsigned long long)ip->i_ino,
96 (int)(be32_to_cpu(dip->di_nextents) +
97 be16_to_cpu(dip->di_anextents)),
98 (unsigned long long)
99 be64_to_cpu(dip->di_nblocks));
100 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
101 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000102 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000103 }
104
105 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
106 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
107 (unsigned long long)ip->i_ino,
108 dip->di_forkoff);
109 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
110 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000111 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000112 }
113
114 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
115 !ip->i_mount->m_rtdev_targp)) {
116 xfs_warn(ip->i_mount,
117 "corrupt dinode %Lu, has realtime flag set.",
118 ip->i_ino);
119 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
120 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000121 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000122 }
123
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100124 switch (VFS_I(ip)->i_mode & S_IFMT) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000125 case S_IFIFO:
126 case S_IFCHR:
127 case S_IFBLK:
128 case S_IFSOCK:
129 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
130 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
131 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000132 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000133 }
134 ip->i_d.di_size = 0;
135 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
136 break;
137
138 case S_IFREG:
139 case S_IFLNK:
140 case S_IFDIR:
141 switch (dip->di_format) {
142 case XFS_DINODE_FMT_LOCAL:
143 /*
144 * no local regular files yet
145 */
146 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
147 xfs_warn(ip->i_mount,
148 "corrupt inode %Lu (local format for regular file).",
149 (unsigned long long) ip->i_ino);
150 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
151 XFS_ERRLEVEL_LOW,
152 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000153 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000154 }
155
156 di_size = be64_to_cpu(dip->di_size);
Dan Carpenter0d0ab122013-08-15 08:53:38 +0300157 if (unlikely(di_size < 0 ||
158 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000159 xfs_warn(ip->i_mount,
160 "corrupt inode %Lu (bad size %Ld for local inode).",
161 (unsigned long long) ip->i_ino,
162 (long long) di_size);
163 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
164 XFS_ERRLEVEL_LOW,
165 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000166 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000167 }
168
169 size = (int)di_size;
170 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
171 break;
172 case XFS_DINODE_FMT_EXTENTS:
173 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
174 break;
175 case XFS_DINODE_FMT_BTREE:
176 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
177 break;
178 default:
179 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
180 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000181 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000182 }
183 break;
184
185 default:
186 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000187 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000188 }
189 if (error) {
190 return error;
191 }
192 if (!XFS_DFORK_Q(dip))
193 return 0;
194
195 ASSERT(ip->i_afp == NULL);
196 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
197
198 switch (dip->di_aformat) {
199 case XFS_DINODE_FMT_LOCAL:
200 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
201 size = be16_to_cpu(atp->hdr.totsize);
202
203 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
204 xfs_warn(ip->i_mount,
205 "corrupt inode %Lu (bad attr fork size %Ld).",
206 (unsigned long long) ip->i_ino,
207 (long long) size);
208 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
209 XFS_ERRLEVEL_LOW,
210 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000211 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000212 }
213
214 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
215 break;
216 case XFS_DINODE_FMT_EXTENTS:
217 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
218 break;
219 case XFS_DINODE_FMT_BTREE:
220 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
221 break;
222 default:
Dave Chinner24513372014-06-25 14:58:08 +1000223 error = -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000224 break;
225 }
226 if (error) {
227 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
228 ip->i_afp = NULL;
229 xfs_idestroy_fork(ip, XFS_DATA_FORK);
230 }
231 return error;
232}
233
234/*
235 * The file is in-lined in the on-disk inode.
236 * If it fits into if_inline_data, then copy
237 * it there, otherwise allocate a buffer for it
238 * and copy the data there. Either way, set
239 * if_data to point at the data.
240 * If we allocate a buffer for the data, make
241 * sure that its size is a multiple of 4 and
242 * record the real size in i_real_bytes.
243 */
244STATIC int
245xfs_iformat_local(
246 xfs_inode_t *ip,
247 xfs_dinode_t *dip,
248 int whichfork,
249 int size)
250{
251 xfs_ifork_t *ifp;
252 int real_size;
253
254 /*
255 * If the size is unreasonable, then something
256 * is wrong and we just bail out rather than crash in
257 * kmem_alloc() or memcpy() below.
258 */
259 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
260 xfs_warn(ip->i_mount,
261 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
262 (unsigned long long) ip->i_ino, size,
263 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
264 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
265 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000266 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000267 }
268 ifp = XFS_IFORK_PTR(ip, whichfork);
269 real_size = 0;
270 if (size == 0)
271 ifp->if_u1.if_data = NULL;
272 else if (size <= sizeof(ifp->if_u2.if_inline_data))
273 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
274 else {
275 real_size = roundup(size, 4);
276 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
277 }
278 ifp->if_bytes = size;
279 ifp->if_real_bytes = real_size;
280 if (size)
281 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
282 ifp->if_flags &= ~XFS_IFEXTENTS;
283 ifp->if_flags |= XFS_IFINLINE;
284 return 0;
285}
286
287/*
288 * The file consists of a set of extents all
289 * of which fit into the on-disk inode.
290 * If there are few enough extents to fit into
291 * the if_inline_ext, then copy them there.
292 * Otherwise allocate a buffer for them and copy
293 * them into it. Either way, set if_extents
294 * to point at the extents.
295 */
296STATIC int
297xfs_iformat_extents(
298 xfs_inode_t *ip,
299 xfs_dinode_t *dip,
300 int whichfork)
301{
302 xfs_bmbt_rec_t *dp;
303 xfs_ifork_t *ifp;
304 int nex;
305 int size;
306 int i;
307
308 ifp = XFS_IFORK_PTR(ip, whichfork);
309 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
310 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
311
312 /*
313 * If the number of extents is unreasonable, then something
314 * is wrong and we just bail out rather than crash in
315 * kmem_alloc() or memcpy() below.
316 */
317 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
318 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
319 (unsigned long long) ip->i_ino, nex);
320 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
321 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000322 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000323 }
324
325 ifp->if_real_bytes = 0;
326 if (nex == 0)
327 ifp->if_u1.if_extents = NULL;
328 else if (nex <= XFS_INLINE_EXTS)
329 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
330 else
331 xfs_iext_add(ifp, 0, nex);
332
333 ifp->if_bytes = size;
334 if (size) {
335 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
336 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
337 for (i = 0; i < nex; i++, dp++) {
338 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
339 ep->l0 = get_unaligned_be64(&dp->l0);
340 ep->l1 = get_unaligned_be64(&dp->l1);
341 }
342 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
343 if (whichfork != XFS_DATA_FORK ||
344 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
345 if (unlikely(xfs_check_nostate_extents(
346 ifp, 0, nex))) {
347 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
348 XFS_ERRLEVEL_LOW,
349 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000350 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000351 }
352 }
353 ifp->if_flags |= XFS_IFEXTENTS;
354 return 0;
355}
356
357/*
358 * The file has too many extents to fit into
359 * the inode, so they are in B-tree format.
360 * Allocate a buffer for the root of the B-tree
361 * and copy the root into it. The i_extents
362 * field will remain NULL until all of the
363 * extents are read in (when they are needed).
364 */
365STATIC int
366xfs_iformat_btree(
367 xfs_inode_t *ip,
368 xfs_dinode_t *dip,
369 int whichfork)
370{
371 struct xfs_mount *mp = ip->i_mount;
372 xfs_bmdr_block_t *dfp;
373 xfs_ifork_t *ifp;
374 /* REFERENCED */
375 int nrecs;
376 int size;
377
378 ifp = XFS_IFORK_PTR(ip, whichfork);
379 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
380 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
381 nrecs = be16_to_cpu(dfp->bb_numrecs);
382
383 /*
384 * blow out if -- fork has less extents than can fit in
385 * fork (fork shouldn't be a btree format), root btree
386 * block has more records than can fit into the fork,
387 * or the number of extents is greater than the number of
388 * blocks.
389 */
390 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
391 XFS_IFORK_MAXEXT(ip, whichfork) ||
392 XFS_BMDR_SPACE_CALC(nrecs) >
393 XFS_DFORK_SIZE(dip, mp, whichfork) ||
394 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
395 xfs_warn(mp, "corrupt inode %Lu (btree).",
396 (unsigned long long) ip->i_ino);
397 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
398 mp, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000399 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000400 }
401
402 ifp->if_broot_bytes = size;
403 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
404 ASSERT(ifp->if_broot != NULL);
405 /*
406 * Copy and convert from the on-disk structure
407 * to the in-memory structure.
408 */
409 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
410 ifp->if_broot, size);
411 ifp->if_flags &= ~XFS_IFEXTENTS;
412 ifp->if_flags |= XFS_IFBROOT;
413
414 return 0;
415}
416
417/*
418 * Read in extents from a btree-format inode.
419 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
420 */
421int
422xfs_iread_extents(
423 xfs_trans_t *tp,
424 xfs_inode_t *ip,
425 int whichfork)
426{
427 int error;
428 xfs_ifork_t *ifp;
429 xfs_extnum_t nextents;
430
Christoph Hellwigeef334e2013-12-06 12:30:17 -0800431 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
432
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000433 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
434 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
435 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000436 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000437 }
438 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
439 ifp = XFS_IFORK_PTR(ip, whichfork);
440
441 /*
442 * We know that the size is valid (it's checked in iformat_btree)
443 */
444 ifp->if_bytes = ifp->if_real_bytes = 0;
445 ifp->if_flags |= XFS_IFEXTENTS;
446 xfs_iext_add(ifp, 0, nextents);
447 error = xfs_bmap_read_extents(tp, ip, whichfork);
448 if (error) {
449 xfs_iext_destroy(ifp);
450 ifp->if_flags &= ~XFS_IFEXTENTS;
451 return error;
452 }
453 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
454 return 0;
455}
456/*
457 * Reallocate the space for if_broot based on the number of records
458 * being added or deleted as indicated in rec_diff. Move the records
459 * and pointers in if_broot to fit the new size. When shrinking this
460 * will eliminate holes between the records and pointers created by
461 * the caller. When growing this will create holes to be filled in
462 * by the caller.
463 *
464 * The caller must not request to add more records than would fit in
465 * the on-disk inode root. If the if_broot is currently NULL, then
Zhi Yong Wuf6c27342013-08-07 10:11:04 +0000466 * if we are adding records, one will be allocated. The caller must also
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000467 * not request that the number of records go below zero, although
468 * it can go to zero.
469 *
470 * ip -- the inode whose if_broot area is changing
471 * ext_diff -- the change in the number of records, positive or negative,
472 * requested for the if_broot array.
473 */
474void
475xfs_iroot_realloc(
476 xfs_inode_t *ip,
477 int rec_diff,
478 int whichfork)
479{
480 struct xfs_mount *mp = ip->i_mount;
481 int cur_max;
482 xfs_ifork_t *ifp;
483 struct xfs_btree_block *new_broot;
484 int new_max;
485 size_t new_size;
486 char *np;
487 char *op;
488
489 /*
490 * Handle the degenerate case quietly.
491 */
492 if (rec_diff == 0) {
493 return;
494 }
495
496 ifp = XFS_IFORK_PTR(ip, whichfork);
497 if (rec_diff > 0) {
498 /*
499 * If there wasn't any memory allocated before, just
500 * allocate it now and get out.
501 */
502 if (ifp->if_broot_bytes == 0) {
503 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
504 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
505 ifp->if_broot_bytes = (int)new_size;
506 return;
507 }
508
509 /*
510 * If there is already an existing if_broot, then we need
511 * to realloc() it and shift the pointers to their new
512 * location. The records don't change location because
513 * they are kept butted up against the btree block header.
514 */
515 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
516 new_max = cur_max + rec_diff;
517 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
518 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000519 KM_SLEEP | KM_NOFS);
520 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
521 ifp->if_broot_bytes);
522 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
523 (int)new_size);
524 ifp->if_broot_bytes = (int)new_size;
525 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
526 XFS_IFORK_SIZE(ip, whichfork));
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000527 memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000528 return;
529 }
530
531 /*
532 * rec_diff is less than 0. In this case, we are shrinking the
533 * if_broot buffer. It must already exist. If we go to zero
534 * records, just get rid of the root and clear the status bit.
535 */
536 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
537 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
538 new_max = cur_max + rec_diff;
539 ASSERT(new_max >= 0);
540 if (new_max > 0)
541 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
542 else
543 new_size = 0;
544 if (new_size > 0) {
545 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
546 /*
547 * First copy over the btree block header.
548 */
549 memcpy(new_broot, ifp->if_broot,
550 XFS_BMBT_BLOCK_LEN(ip->i_mount));
551 } else {
552 new_broot = NULL;
553 ifp->if_flags &= ~XFS_IFBROOT;
554 }
555
556 /*
557 * Only copy the records and pointers if there are any.
558 */
559 if (new_max > 0) {
560 /*
561 * First copy the records.
562 */
563 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
564 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
565 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
566
567 /*
568 * Then copy the pointers.
569 */
570 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
571 ifp->if_broot_bytes);
572 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
573 (int)new_size);
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000574 memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000575 }
576 kmem_free(ifp->if_broot);
577 ifp->if_broot = new_broot;
578 ifp->if_broot_bytes = (int)new_size;
579 if (ifp->if_broot)
580 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
581 XFS_IFORK_SIZE(ip, whichfork));
582 return;
583}
584
585
586/*
587 * This is called when the amount of space needed for if_data
588 * is increased or decreased. The change in size is indicated by
589 * the number of bytes that need to be added or deleted in the
590 * byte_diff parameter.
591 *
592 * If the amount of space needed has decreased below the size of the
593 * inline buffer, then switch to using the inline buffer. Otherwise,
594 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
595 * to what is needed.
596 *
597 * ip -- the inode whose if_data area is changing
598 * byte_diff -- the change in the number of bytes, positive or negative,
599 * requested for the if_data array.
600 */
601void
602xfs_idata_realloc(
603 xfs_inode_t *ip,
604 int byte_diff,
605 int whichfork)
606{
607 xfs_ifork_t *ifp;
608 int new_size;
609 int real_size;
610
611 if (byte_diff == 0) {
612 return;
613 }
614
615 ifp = XFS_IFORK_PTR(ip, whichfork);
616 new_size = (int)ifp->if_bytes + byte_diff;
617 ASSERT(new_size >= 0);
618
619 if (new_size == 0) {
620 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
621 kmem_free(ifp->if_u1.if_data);
622 }
623 ifp->if_u1.if_data = NULL;
624 real_size = 0;
625 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
626 /*
627 * If the valid extents/data can fit in if_inline_ext/data,
628 * copy them from the malloc'd vector and free it.
629 */
630 if (ifp->if_u1.if_data == NULL) {
631 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
632 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
633 ASSERT(ifp->if_real_bytes != 0);
634 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
635 new_size);
636 kmem_free(ifp->if_u1.if_data);
637 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
638 }
639 real_size = 0;
640 } else {
641 /*
642 * Stuck with malloc/realloc.
643 * For inline data, the underlying buffer must be
644 * a multiple of 4 bytes in size so that it can be
645 * logged and stay on word boundaries. We enforce
646 * that here.
647 */
648 real_size = roundup(new_size, 4);
649 if (ifp->if_u1.if_data == NULL) {
650 ASSERT(ifp->if_real_bytes == 0);
651 ifp->if_u1.if_data = kmem_alloc(real_size,
652 KM_SLEEP | KM_NOFS);
653 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
654 /*
655 * Only do the realloc if the underlying size
656 * is really changing.
657 */
658 if (ifp->if_real_bytes != real_size) {
659 ifp->if_u1.if_data =
660 kmem_realloc(ifp->if_u1.if_data,
661 real_size,
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000662 KM_SLEEP | KM_NOFS);
663 }
664 } else {
665 ASSERT(ifp->if_real_bytes == 0);
666 ifp->if_u1.if_data = kmem_alloc(real_size,
667 KM_SLEEP | KM_NOFS);
668 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
669 ifp->if_bytes);
670 }
671 }
672 ifp->if_real_bytes = real_size;
673 ifp->if_bytes = new_size;
674 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
675}
676
677void
678xfs_idestroy_fork(
679 xfs_inode_t *ip,
680 int whichfork)
681{
682 xfs_ifork_t *ifp;
683
684 ifp = XFS_IFORK_PTR(ip, whichfork);
685 if (ifp->if_broot != NULL) {
686 kmem_free(ifp->if_broot);
687 ifp->if_broot = NULL;
688 }
689
690 /*
691 * If the format is local, then we can't have an extents
692 * array so just look for an inline data array. If we're
693 * not local then we may or may not have an extents list,
694 * so check and free it up if we do.
695 */
696 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
697 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
698 (ifp->if_u1.if_data != NULL)) {
699 ASSERT(ifp->if_real_bytes != 0);
700 kmem_free(ifp->if_u1.if_data);
701 ifp->if_u1.if_data = NULL;
702 ifp->if_real_bytes = 0;
703 }
704 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
705 ((ifp->if_flags & XFS_IFEXTIREC) ||
706 ((ifp->if_u1.if_extents != NULL) &&
707 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
708 ASSERT(ifp->if_real_bytes != 0);
709 xfs_iext_destroy(ifp);
710 }
711 ASSERT(ifp->if_u1.if_extents == NULL ||
712 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
713 ASSERT(ifp->if_real_bytes == 0);
714 if (whichfork == XFS_ATTR_FORK) {
715 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
716 ip->i_afp = NULL;
717 }
718}
719
720/*
Christoph Hellwigda776502013-12-13 11:34:04 +1100721 * Convert in-core extents to on-disk form
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000722 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100723 * For either the data or attr fork in extent format, we need to endian convert
724 * the in-core extent as we place them into the on-disk inode.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000725 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100726 * In the case of the data fork, the in-core and on-disk fork sizes can be
727 * different due to delayed allocation extents. We only copy on-disk extents
728 * here, so callers must always use the physical fork size to determine the
729 * size of the buffer passed to this routine. We will return the size actually
730 * used.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000731 */
732int
733xfs_iextents_copy(
734 xfs_inode_t *ip,
735 xfs_bmbt_rec_t *dp,
736 int whichfork)
737{
738 int copied;
739 int i;
740 xfs_ifork_t *ifp;
741 int nrecs;
742 xfs_fsblock_t start_block;
743
744 ifp = XFS_IFORK_PTR(ip, whichfork);
745 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
746 ASSERT(ifp->if_bytes > 0);
747
748 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
749 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
750 ASSERT(nrecs > 0);
751
752 /*
753 * There are some delayed allocation extents in the
754 * inode, so copy the extents one at a time and skip
755 * the delayed ones. There must be at least one
756 * non-delayed extent.
757 */
758 copied = 0;
759 for (i = 0; i < nrecs; i++) {
760 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
761 start_block = xfs_bmbt_get_startblock(ep);
762 if (isnullstartblock(start_block)) {
763 /*
764 * It's a delayed allocation extent, so skip it.
765 */
766 continue;
767 }
768
769 /* Translate to on disk format */
Dave Chinnerc5c249b2013-08-12 20:49:43 +1000770 put_unaligned_be64(ep->l0, &dp->l0);
771 put_unaligned_be64(ep->l1, &dp->l1);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000772 dp++;
773 copied++;
774 }
775 ASSERT(copied != 0);
776 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
777
778 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
779}
780
781/*
782 * Each of the following cases stores data into the same region
783 * of the on-disk inode, so only one of them can be valid at
784 * any given time. While it is possible to have conflicting formats
785 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
786 * in EXTENTS format, this can only happen when the fork has
787 * changed formats after being modified but before being flushed.
788 * In these cases, the format always takes precedence, because the
789 * format indicates the current state of the fork.
790 */
791void
792xfs_iflush_fork(
793 xfs_inode_t *ip,
794 xfs_dinode_t *dip,
795 xfs_inode_log_item_t *iip,
Eric Sandeenfd9fdba2014-04-14 19:04:46 +1000796 int whichfork)
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000797{
798 char *cp;
799 xfs_ifork_t *ifp;
800 xfs_mount_t *mp;
801 static const short brootflag[2] =
802 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
803 static const short dataflag[2] =
804 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
805 static const short extflag[2] =
806 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
807
808 if (!iip)
809 return;
810 ifp = XFS_IFORK_PTR(ip, whichfork);
811 /*
812 * This can happen if we gave up in iformat in an error path,
813 * for the attribute fork.
814 */
815 if (!ifp) {
816 ASSERT(whichfork == XFS_ATTR_FORK);
817 return;
818 }
819 cp = XFS_DFORK_PTR(dip, whichfork);
820 mp = ip->i_mount;
821 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
822 case XFS_DINODE_FMT_LOCAL:
823 if ((iip->ili_fields & dataflag[whichfork]) &&
824 (ifp->if_bytes > 0)) {
825 ASSERT(ifp->if_u1.if_data != NULL);
826 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
827 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
828 }
829 break;
830
831 case XFS_DINODE_FMT_EXTENTS:
832 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
833 !(iip->ili_fields & extflag[whichfork]));
834 if ((iip->ili_fields & extflag[whichfork]) &&
835 (ifp->if_bytes > 0)) {
836 ASSERT(xfs_iext_get_ext(ifp, 0));
837 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
838 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
839 whichfork);
840 }
841 break;
842
843 case XFS_DINODE_FMT_BTREE:
844 if ((iip->ili_fields & brootflag[whichfork]) &&
845 (ifp->if_broot_bytes > 0)) {
846 ASSERT(ifp->if_broot != NULL);
847 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
848 XFS_IFORK_SIZE(ip, whichfork));
849 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
850 (xfs_bmdr_block_t *)cp,
851 XFS_DFORK_SIZE(dip, mp, whichfork));
852 }
853 break;
854
855 case XFS_DINODE_FMT_DEV:
856 if (iip->ili_fields & XFS_ILOG_DEV) {
857 ASSERT(whichfork == XFS_DATA_FORK);
858 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
859 }
860 break;
861
862 case XFS_DINODE_FMT_UUID:
863 if (iip->ili_fields & XFS_ILOG_UUID) {
864 ASSERT(whichfork == XFS_DATA_FORK);
865 memcpy(XFS_DFORK_DPTR(dip),
866 &ip->i_df.if_u2.if_uuid,
867 sizeof(uuid_t));
868 }
869 break;
870
871 default:
872 ASSERT(0);
873 break;
874 }
875}
876
877/*
878 * Return a pointer to the extent record at file index idx.
879 */
880xfs_bmbt_rec_host_t *
881xfs_iext_get_ext(
882 xfs_ifork_t *ifp, /* inode fork pointer */
883 xfs_extnum_t idx) /* index of target extent */
884{
885 ASSERT(idx >= 0);
886 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
887
888 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
889 return ifp->if_u1.if_ext_irec->er_extbuf;
890 } else if (ifp->if_flags & XFS_IFEXTIREC) {
891 xfs_ext_irec_t *erp; /* irec pointer */
892 int erp_idx = 0; /* irec index */
893 xfs_extnum_t page_idx = idx; /* ext index in target list */
894
895 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
896 return &erp->er_extbuf[page_idx];
897 } else if (ifp->if_bytes) {
898 return &ifp->if_u1.if_extents[idx];
899 } else {
900 return NULL;
901 }
902}
903
904/*
905 * Insert new item(s) into the extent records for incore inode
906 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
907 */
908void
909xfs_iext_insert(
910 xfs_inode_t *ip, /* incore inode pointer */
911 xfs_extnum_t idx, /* starting index of new items */
912 xfs_extnum_t count, /* number of inserted items */
913 xfs_bmbt_irec_t *new, /* items to insert */
914 int state) /* type of extent conversion */
915{
916 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
917 xfs_extnum_t i; /* extent record index */
918
919 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
920
921 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
922 xfs_iext_add(ifp, idx, count);
923 for (i = idx; i < idx + count; i++, new++)
924 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
925}
926
927/*
928 * This is called when the amount of space required for incore file
929 * extents needs to be increased. The ext_diff parameter stores the
930 * number of new extents being added and the idx parameter contains
931 * the extent index where the new extents will be added. If the new
932 * extents are being appended, then we just need to (re)allocate and
933 * initialize the space. Otherwise, if the new extents are being
934 * inserted into the middle of the existing entries, a bit more work
935 * is required to make room for the new extents to be inserted. The
936 * caller is responsible for filling in the new extent entries upon
937 * return.
938 */
939void
940xfs_iext_add(
941 xfs_ifork_t *ifp, /* inode fork pointer */
942 xfs_extnum_t idx, /* index to begin adding exts */
943 int ext_diff) /* number of extents to add */
944{
945 int byte_diff; /* new bytes being added */
946 int new_size; /* size of extents after adding */
947 xfs_extnum_t nextents; /* number of extents in file */
948
949 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
950 ASSERT((idx >= 0) && (idx <= nextents));
951 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
952 new_size = ifp->if_bytes + byte_diff;
953 /*
954 * If the new number of extents (nextents + ext_diff)
955 * fits inside the inode, then continue to use the inline
956 * extent buffer.
957 */
958 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
959 if (idx < nextents) {
960 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
961 &ifp->if_u2.if_inline_ext[idx],
962 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
963 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
964 }
965 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
966 ifp->if_real_bytes = 0;
967 }
968 /*
969 * Otherwise use a linear (direct) extent list.
970 * If the extents are currently inside the inode,
971 * xfs_iext_realloc_direct will switch us from
972 * inline to direct extent allocation mode.
973 */
974 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
975 xfs_iext_realloc_direct(ifp, new_size);
976 if (idx < nextents) {
977 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
978 &ifp->if_u1.if_extents[idx],
979 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
980 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
981 }
982 }
983 /* Indirection array */
984 else {
985 xfs_ext_irec_t *erp;
986 int erp_idx = 0;
987 int page_idx = idx;
988
989 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
990 if (ifp->if_flags & XFS_IFEXTIREC) {
991 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
992 } else {
993 xfs_iext_irec_init(ifp);
994 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
995 erp = ifp->if_u1.if_ext_irec;
996 }
997 /* Extents fit in target extent page */
998 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
999 if (page_idx < erp->er_extcount) {
1000 memmove(&erp->er_extbuf[page_idx + ext_diff],
1001 &erp->er_extbuf[page_idx],
1002 (erp->er_extcount - page_idx) *
1003 sizeof(xfs_bmbt_rec_t));
1004 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1005 }
1006 erp->er_extcount += ext_diff;
1007 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1008 }
1009 /* Insert a new extent page */
1010 else if (erp) {
1011 xfs_iext_add_indirect_multi(ifp,
1012 erp_idx, page_idx, ext_diff);
1013 }
1014 /*
1015 * If extent(s) are being appended to the last page in
1016 * the indirection array and the new extent(s) don't fit
1017 * in the page, then erp is NULL and erp_idx is set to
1018 * the next index needed in the indirection array.
1019 */
1020 else {
Jie Liubb86d212013-10-25 14:52:44 +08001021 uint count = ext_diff;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001022
1023 while (count) {
1024 erp = xfs_iext_irec_new(ifp, erp_idx);
Jie Liubb86d212013-10-25 14:52:44 +08001025 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1026 count -= erp->er_extcount;
1027 if (count)
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001028 erp_idx++;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001029 }
1030 }
1031 }
1032 ifp->if_bytes = new_size;
1033}
1034
1035/*
1036 * This is called when incore extents are being added to the indirection
1037 * array and the new extents do not fit in the target extent list. The
1038 * erp_idx parameter contains the irec index for the target extent list
1039 * in the indirection array, and the idx parameter contains the extent
1040 * index within the list. The number of extents being added is stored
1041 * in the count parameter.
1042 *
1043 * |-------| |-------|
1044 * | | | | idx - number of extents before idx
1045 * | idx | | count |
1046 * | | | | count - number of extents being inserted at idx
1047 * |-------| |-------|
1048 * | count | | nex2 | nex2 - number of extents after idx + count
1049 * |-------| |-------|
1050 */
1051void
1052xfs_iext_add_indirect_multi(
1053 xfs_ifork_t *ifp, /* inode fork pointer */
1054 int erp_idx, /* target extent irec index */
1055 xfs_extnum_t idx, /* index within target list */
1056 int count) /* new extents being added */
1057{
1058 int byte_diff; /* new bytes being added */
1059 xfs_ext_irec_t *erp; /* pointer to irec entry */
1060 xfs_extnum_t ext_diff; /* number of extents to add */
1061 xfs_extnum_t ext_cnt; /* new extents still needed */
1062 xfs_extnum_t nex2; /* extents after idx + count */
1063 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1064 int nlists; /* number of irec's (lists) */
1065
1066 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1067 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1068 nex2 = erp->er_extcount - idx;
1069 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1070
1071 /*
1072 * Save second part of target extent list
1073 * (all extents past */
1074 if (nex2) {
1075 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1076 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1077 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1078 erp->er_extcount -= nex2;
1079 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1080 memset(&erp->er_extbuf[idx], 0, byte_diff);
1081 }
1082
1083 /*
1084 * Add the new extents to the end of the target
1085 * list, then allocate new irec record(s) and
1086 * extent buffer(s) as needed to store the rest
1087 * of the new extents.
1088 */
1089 ext_cnt = count;
1090 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1091 if (ext_diff) {
1092 erp->er_extcount += ext_diff;
1093 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1094 ext_cnt -= ext_diff;
1095 }
1096 while (ext_cnt) {
1097 erp_idx++;
1098 erp = xfs_iext_irec_new(ifp, erp_idx);
1099 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1100 erp->er_extcount = ext_diff;
1101 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1102 ext_cnt -= ext_diff;
1103 }
1104
1105 /* Add nex2 extents back to indirection array */
1106 if (nex2) {
1107 xfs_extnum_t ext_avail;
1108 int i;
1109
1110 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1111 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1112 i = 0;
1113 /*
1114 * If nex2 extents fit in the current page, append
1115 * nex2_ep after the new extents.
1116 */
1117 if (nex2 <= ext_avail) {
1118 i = erp->er_extcount;
1119 }
1120 /*
1121 * Otherwise, check if space is available in the
1122 * next page.
1123 */
1124 else if ((erp_idx < nlists - 1) &&
1125 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1126 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1127 erp_idx++;
1128 erp++;
1129 /* Create a hole for nex2 extents */
1130 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1131 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1132 }
1133 /*
1134 * Final choice, create a new extent page for
1135 * nex2 extents.
1136 */
1137 else {
1138 erp_idx++;
1139 erp = xfs_iext_irec_new(ifp, erp_idx);
1140 }
1141 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1142 kmem_free(nex2_ep);
1143 erp->er_extcount += nex2;
1144 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1145 }
1146}
1147
1148/*
1149 * This is called when the amount of space required for incore file
1150 * extents needs to be decreased. The ext_diff parameter stores the
1151 * number of extents to be removed and the idx parameter contains
1152 * the extent index where the extents will be removed from.
1153 *
1154 * If the amount of space needed has decreased below the linear
1155 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1156 * extent array. Otherwise, use kmem_realloc() to adjust the
1157 * size to what is needed.
1158 */
1159void
1160xfs_iext_remove(
1161 xfs_inode_t *ip, /* incore inode pointer */
1162 xfs_extnum_t idx, /* index to begin removing exts */
1163 int ext_diff, /* number of extents to remove */
1164 int state) /* type of extent conversion */
1165{
1166 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1167 xfs_extnum_t nextents; /* number of extents in file */
1168 int new_size; /* size of extents after removal */
1169
1170 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1171
1172 ASSERT(ext_diff > 0);
1173 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1174 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1175
1176 if (new_size == 0) {
1177 xfs_iext_destroy(ifp);
1178 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1179 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1180 } else if (ifp->if_real_bytes) {
1181 xfs_iext_remove_direct(ifp, idx, ext_diff);
1182 } else {
1183 xfs_iext_remove_inline(ifp, idx, ext_diff);
1184 }
1185 ifp->if_bytes = new_size;
1186}
1187
1188/*
1189 * This removes ext_diff extents from the inline buffer, beginning
1190 * at extent index idx.
1191 */
1192void
1193xfs_iext_remove_inline(
1194 xfs_ifork_t *ifp, /* inode fork pointer */
1195 xfs_extnum_t idx, /* index to begin removing exts */
1196 int ext_diff) /* number of extents to remove */
1197{
1198 int nextents; /* number of extents in file */
1199
1200 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1201 ASSERT(idx < XFS_INLINE_EXTS);
1202 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1203 ASSERT(((nextents - ext_diff) > 0) &&
1204 (nextents - ext_diff) < XFS_INLINE_EXTS);
1205
1206 if (idx + ext_diff < nextents) {
1207 memmove(&ifp->if_u2.if_inline_ext[idx],
1208 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1209 (nextents - (idx + ext_diff)) *
1210 sizeof(xfs_bmbt_rec_t));
1211 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1212 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1213 } else {
1214 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1215 ext_diff * sizeof(xfs_bmbt_rec_t));
1216 }
1217}
1218
1219/*
1220 * This removes ext_diff extents from a linear (direct) extent list,
1221 * beginning at extent index idx. If the extents are being removed
1222 * from the end of the list (ie. truncate) then we just need to re-
1223 * allocate the list to remove the extra space. Otherwise, if the
1224 * extents are being removed from the middle of the existing extent
1225 * entries, then we first need to move the extent records beginning
1226 * at idx + ext_diff up in the list to overwrite the records being
1227 * removed, then remove the extra space via kmem_realloc.
1228 */
1229void
1230xfs_iext_remove_direct(
1231 xfs_ifork_t *ifp, /* inode fork pointer */
1232 xfs_extnum_t idx, /* index to begin removing exts */
1233 int ext_diff) /* number of extents to remove */
1234{
1235 xfs_extnum_t nextents; /* number of extents in file */
1236 int new_size; /* size of extents after removal */
1237
1238 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1239 new_size = ifp->if_bytes -
1240 (ext_diff * sizeof(xfs_bmbt_rec_t));
1241 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1242
1243 if (new_size == 0) {
1244 xfs_iext_destroy(ifp);
1245 return;
1246 }
1247 /* Move extents up in the list (if needed) */
1248 if (idx + ext_diff < nextents) {
1249 memmove(&ifp->if_u1.if_extents[idx],
1250 &ifp->if_u1.if_extents[idx + ext_diff],
1251 (nextents - (idx + ext_diff)) *
1252 sizeof(xfs_bmbt_rec_t));
1253 }
1254 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1255 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1256 /*
1257 * Reallocate the direct extent list. If the extents
1258 * will fit inside the inode then xfs_iext_realloc_direct
1259 * will switch from direct to inline extent allocation
1260 * mode for us.
1261 */
1262 xfs_iext_realloc_direct(ifp, new_size);
1263 ifp->if_bytes = new_size;
1264}
1265
1266/*
1267 * This is called when incore extents are being removed from the
1268 * indirection array and the extents being removed span multiple extent
1269 * buffers. The idx parameter contains the file extent index where we
1270 * want to begin removing extents, and the count parameter contains
1271 * how many extents need to be removed.
1272 *
1273 * |-------| |-------|
1274 * | nex1 | | | nex1 - number of extents before idx
1275 * |-------| | count |
1276 * | | | | count - number of extents being removed at idx
1277 * | count | |-------|
1278 * | | | nex2 | nex2 - number of extents after idx + count
1279 * |-------| |-------|
1280 */
1281void
1282xfs_iext_remove_indirect(
1283 xfs_ifork_t *ifp, /* inode fork pointer */
1284 xfs_extnum_t idx, /* index to begin removing extents */
1285 int count) /* number of extents to remove */
1286{
1287 xfs_ext_irec_t *erp; /* indirection array pointer */
1288 int erp_idx = 0; /* indirection array index */
1289 xfs_extnum_t ext_cnt; /* extents left to remove */
1290 xfs_extnum_t ext_diff; /* extents to remove in current list */
1291 xfs_extnum_t nex1; /* number of extents before idx */
1292 xfs_extnum_t nex2; /* extents after idx + count */
1293 int page_idx = idx; /* index in target extent list */
1294
1295 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1296 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1297 ASSERT(erp != NULL);
1298 nex1 = page_idx;
1299 ext_cnt = count;
1300 while (ext_cnt) {
1301 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1302 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1303 /*
1304 * Check for deletion of entire list;
1305 * xfs_iext_irec_remove() updates extent offsets.
1306 */
1307 if (ext_diff == erp->er_extcount) {
1308 xfs_iext_irec_remove(ifp, erp_idx);
1309 ext_cnt -= ext_diff;
1310 nex1 = 0;
1311 if (ext_cnt) {
1312 ASSERT(erp_idx < ifp->if_real_bytes /
1313 XFS_IEXT_BUFSZ);
1314 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1315 nex1 = 0;
1316 continue;
1317 } else {
1318 break;
1319 }
1320 }
1321 /* Move extents up (if needed) */
1322 if (nex2) {
1323 memmove(&erp->er_extbuf[nex1],
1324 &erp->er_extbuf[nex1 + ext_diff],
1325 nex2 * sizeof(xfs_bmbt_rec_t));
1326 }
1327 /* Zero out rest of page */
1328 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1329 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1330 /* Update remaining counters */
1331 erp->er_extcount -= ext_diff;
1332 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1333 ext_cnt -= ext_diff;
1334 nex1 = 0;
1335 erp_idx++;
1336 erp++;
1337 }
1338 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1339 xfs_iext_irec_compact(ifp);
1340}
1341
1342/*
1343 * Create, destroy, or resize a linear (direct) block of extents.
1344 */
1345void
1346xfs_iext_realloc_direct(
1347 xfs_ifork_t *ifp, /* inode fork pointer */
Jie Liu17ec81c2013-09-22 16:25:15 +08001348 int new_size) /* new size of extents after adding */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001349{
1350 int rnew_size; /* real new size of extents */
1351
1352 rnew_size = new_size;
1353
1354 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1355 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1356 (new_size != ifp->if_real_bytes)));
1357
1358 /* Free extent records */
1359 if (new_size == 0) {
1360 xfs_iext_destroy(ifp);
1361 }
1362 /* Resize direct extent list and zero any new bytes */
1363 else if (ifp->if_real_bytes) {
1364 /* Check if extents will fit inside the inode */
1365 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1366 xfs_iext_direct_to_inline(ifp, new_size /
1367 (uint)sizeof(xfs_bmbt_rec_t));
1368 ifp->if_bytes = new_size;
1369 return;
1370 }
1371 if (!is_power_of_2(new_size)){
1372 rnew_size = roundup_pow_of_two(new_size);
1373 }
1374 if (rnew_size != ifp->if_real_bytes) {
1375 ifp->if_u1.if_extents =
1376 kmem_realloc(ifp->if_u1.if_extents,
Christoph Hellwig664b60f2016-04-06 09:47:01 +10001377 rnew_size, KM_NOFS);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001378 }
1379 if (rnew_size > ifp->if_real_bytes) {
1380 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1381 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1382 rnew_size - ifp->if_real_bytes);
1383 }
1384 }
Jie Liu17ec81c2013-09-22 16:25:15 +08001385 /* Switch from the inline extent buffer to a direct extent list */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001386 else {
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001387 if (!is_power_of_2(new_size)) {
1388 rnew_size = roundup_pow_of_two(new_size);
1389 }
1390 xfs_iext_inline_to_direct(ifp, rnew_size);
1391 }
1392 ifp->if_real_bytes = rnew_size;
1393 ifp->if_bytes = new_size;
1394}
1395
1396/*
1397 * Switch from linear (direct) extent records to inline buffer.
1398 */
1399void
1400xfs_iext_direct_to_inline(
1401 xfs_ifork_t *ifp, /* inode fork pointer */
1402 xfs_extnum_t nextents) /* number of extents in file */
1403{
1404 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1405 ASSERT(nextents <= XFS_INLINE_EXTS);
1406 /*
1407 * The inline buffer was zeroed when we switched
1408 * from inline to direct extent allocation mode,
1409 * so we don't need to clear it here.
1410 */
1411 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1412 nextents * sizeof(xfs_bmbt_rec_t));
1413 kmem_free(ifp->if_u1.if_extents);
1414 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1415 ifp->if_real_bytes = 0;
1416}
1417
1418/*
1419 * Switch from inline buffer to linear (direct) extent records.
1420 * new_size should already be rounded up to the next power of 2
1421 * by the caller (when appropriate), so use new_size as it is.
1422 * However, since new_size may be rounded up, we can't update
1423 * if_bytes here. It is the caller's responsibility to update
1424 * if_bytes upon return.
1425 */
1426void
1427xfs_iext_inline_to_direct(
1428 xfs_ifork_t *ifp, /* inode fork pointer */
1429 int new_size) /* number of extents in file */
1430{
1431 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1432 memset(ifp->if_u1.if_extents, 0, new_size);
1433 if (ifp->if_bytes) {
1434 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1435 ifp->if_bytes);
1436 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1437 sizeof(xfs_bmbt_rec_t));
1438 }
1439 ifp->if_real_bytes = new_size;
1440}
1441
1442/*
1443 * Resize an extent indirection array to new_size bytes.
1444 */
1445STATIC void
1446xfs_iext_realloc_indirect(
1447 xfs_ifork_t *ifp, /* inode fork pointer */
1448 int new_size) /* new indirection array size */
1449{
1450 int nlists; /* number of irec's (ex lists) */
1451 int size; /* current indirection array size */
1452
1453 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1454 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1455 size = nlists * sizeof(xfs_ext_irec_t);
1456 ASSERT(ifp->if_real_bytes);
1457 ASSERT((new_size >= 0) && (new_size != size));
1458 if (new_size == 0) {
1459 xfs_iext_destroy(ifp);
1460 } else {
Christoph Hellwig664b60f2016-04-06 09:47:01 +10001461 ifp->if_u1.if_ext_irec =
1462 kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001463 }
1464}
1465
1466/*
1467 * Switch from indirection array to linear (direct) extent allocations.
1468 */
1469STATIC void
1470xfs_iext_indirect_to_direct(
1471 xfs_ifork_t *ifp) /* inode fork pointer */
1472{
1473 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1474 xfs_extnum_t nextents; /* number of extents in file */
1475 int size; /* size of file extents */
1476
1477 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1478 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1479 ASSERT(nextents <= XFS_LINEAR_EXTS);
1480 size = nextents * sizeof(xfs_bmbt_rec_t);
1481
1482 xfs_iext_irec_compact_pages(ifp);
1483 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1484
1485 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1486 kmem_free(ifp->if_u1.if_ext_irec);
1487 ifp->if_flags &= ~XFS_IFEXTIREC;
1488 ifp->if_u1.if_extents = ep;
1489 ifp->if_bytes = size;
1490 if (nextents < XFS_LINEAR_EXTS) {
1491 xfs_iext_realloc_direct(ifp, size);
1492 }
1493}
1494
1495/*
1496 * Free incore file extents.
1497 */
1498void
1499xfs_iext_destroy(
1500 xfs_ifork_t *ifp) /* inode fork pointer */
1501{
1502 if (ifp->if_flags & XFS_IFEXTIREC) {
1503 int erp_idx;
1504 int nlists;
1505
1506 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1507 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1508 xfs_iext_irec_remove(ifp, erp_idx);
1509 }
1510 ifp->if_flags &= ~XFS_IFEXTIREC;
1511 } else if (ifp->if_real_bytes) {
1512 kmem_free(ifp->if_u1.if_extents);
1513 } else if (ifp->if_bytes) {
1514 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1515 sizeof(xfs_bmbt_rec_t));
1516 }
1517 ifp->if_u1.if_extents = NULL;
1518 ifp->if_real_bytes = 0;
1519 ifp->if_bytes = 0;
1520}
1521
1522/*
1523 * Return a pointer to the extent record for file system block bno.
1524 */
1525xfs_bmbt_rec_host_t * /* pointer to found extent record */
1526xfs_iext_bno_to_ext(
1527 xfs_ifork_t *ifp, /* inode fork pointer */
1528 xfs_fileoff_t bno, /* block number to search for */
1529 xfs_extnum_t *idxp) /* index of target extent */
1530{
1531 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1532 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1533 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1534 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1535 int high; /* upper boundary in search */
1536 xfs_extnum_t idx = 0; /* index of target extent */
1537 int low; /* lower boundary in search */
1538 xfs_extnum_t nextents; /* number of file extents */
1539 xfs_fileoff_t startoff = 0; /* start offset of extent */
1540
1541 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1542 if (nextents == 0) {
1543 *idxp = 0;
1544 return NULL;
1545 }
1546 low = 0;
1547 if (ifp->if_flags & XFS_IFEXTIREC) {
1548 /* Find target extent list */
1549 int erp_idx = 0;
1550 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1551 base = erp->er_extbuf;
1552 high = erp->er_extcount - 1;
1553 } else {
1554 base = ifp->if_u1.if_extents;
1555 high = nextents - 1;
1556 }
1557 /* Binary search extent records */
1558 while (low <= high) {
1559 idx = (low + high) >> 1;
1560 ep = base + idx;
1561 startoff = xfs_bmbt_get_startoff(ep);
1562 blockcount = xfs_bmbt_get_blockcount(ep);
1563 if (bno < startoff) {
1564 high = idx - 1;
1565 } else if (bno >= startoff + blockcount) {
1566 low = idx + 1;
1567 } else {
1568 /* Convert back to file-based extent index */
1569 if (ifp->if_flags & XFS_IFEXTIREC) {
1570 idx += erp->er_extoff;
1571 }
1572 *idxp = idx;
1573 return ep;
1574 }
1575 }
1576 /* Convert back to file-based extent index */
1577 if (ifp->if_flags & XFS_IFEXTIREC) {
1578 idx += erp->er_extoff;
1579 }
1580 if (bno >= startoff + blockcount) {
1581 if (++idx == nextents) {
1582 ep = NULL;
1583 } else {
1584 ep = xfs_iext_get_ext(ifp, idx);
1585 }
1586 }
1587 *idxp = idx;
1588 return ep;
1589}
1590
1591/*
1592 * Return a pointer to the indirection array entry containing the
1593 * extent record for filesystem block bno. Store the index of the
1594 * target irec in *erp_idxp.
1595 */
1596xfs_ext_irec_t * /* pointer to found extent record */
1597xfs_iext_bno_to_irec(
1598 xfs_ifork_t *ifp, /* inode fork pointer */
1599 xfs_fileoff_t bno, /* block number to search for */
1600 int *erp_idxp) /* irec index of target ext list */
1601{
1602 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1603 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1604 int erp_idx; /* indirection array index */
1605 int nlists; /* number of extent irec's (lists) */
1606 int high; /* binary search upper limit */
1607 int low; /* binary search lower limit */
1608
1609 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1610 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1611 erp_idx = 0;
1612 low = 0;
1613 high = nlists - 1;
1614 while (low <= high) {
1615 erp_idx = (low + high) >> 1;
1616 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1617 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1618 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1619 high = erp_idx - 1;
1620 } else if (erp_next && bno >=
1621 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1622 low = erp_idx + 1;
1623 } else {
1624 break;
1625 }
1626 }
1627 *erp_idxp = erp_idx;
1628 return erp;
1629}
1630
1631/*
1632 * Return a pointer to the indirection array entry containing the
1633 * extent record at file extent index *idxp. Store the index of the
1634 * target irec in *erp_idxp and store the page index of the target
1635 * extent record in *idxp.
1636 */
1637xfs_ext_irec_t *
1638xfs_iext_idx_to_irec(
1639 xfs_ifork_t *ifp, /* inode fork pointer */
1640 xfs_extnum_t *idxp, /* extent index (file -> page) */
1641 int *erp_idxp, /* pointer to target irec */
1642 int realloc) /* new bytes were just added */
1643{
1644 xfs_ext_irec_t *prev; /* pointer to previous irec */
1645 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1646 int erp_idx; /* indirection array index */
1647 int nlists; /* number of irec's (ex lists) */
1648 int high; /* binary search upper limit */
1649 int low; /* binary search lower limit */
1650 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1651
1652 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1653 ASSERT(page_idx >= 0);
1654 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1655 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1656
1657 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1658 erp_idx = 0;
1659 low = 0;
1660 high = nlists - 1;
1661
1662 /* Binary search extent irec's */
1663 while (low <= high) {
1664 erp_idx = (low + high) >> 1;
1665 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1666 prev = erp_idx > 0 ? erp - 1 : NULL;
1667 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1668 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1669 high = erp_idx - 1;
1670 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1671 (page_idx == erp->er_extoff + erp->er_extcount &&
1672 !realloc)) {
1673 low = erp_idx + 1;
1674 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1675 erp->er_extcount == XFS_LINEAR_EXTS) {
1676 ASSERT(realloc);
1677 page_idx = 0;
1678 erp_idx++;
1679 erp = erp_idx < nlists ? erp + 1 : NULL;
1680 break;
1681 } else {
1682 page_idx -= erp->er_extoff;
1683 break;
1684 }
1685 }
1686 *idxp = page_idx;
1687 *erp_idxp = erp_idx;
Eric Sandeend99831f2014-06-22 15:03:54 +10001688 return erp;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001689}
1690
1691/*
1692 * Allocate and initialize an indirection array once the space needed
1693 * for incore extents increases above XFS_IEXT_BUFSZ.
1694 */
1695void
1696xfs_iext_irec_init(
1697 xfs_ifork_t *ifp) /* inode fork pointer */
1698{
1699 xfs_ext_irec_t *erp; /* indirection array pointer */
1700 xfs_extnum_t nextents; /* number of extents in file */
1701
1702 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1703 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1704 ASSERT(nextents <= XFS_LINEAR_EXTS);
1705
1706 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1707
1708 if (nextents == 0) {
1709 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1710 } else if (!ifp->if_real_bytes) {
1711 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1712 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1713 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1714 }
1715 erp->er_extbuf = ifp->if_u1.if_extents;
1716 erp->er_extcount = nextents;
1717 erp->er_extoff = 0;
1718
1719 ifp->if_flags |= XFS_IFEXTIREC;
1720 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1721 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1722 ifp->if_u1.if_ext_irec = erp;
1723
1724 return;
1725}
1726
1727/*
1728 * Allocate and initialize a new entry in the indirection array.
1729 */
1730xfs_ext_irec_t *
1731xfs_iext_irec_new(
1732 xfs_ifork_t *ifp, /* inode fork pointer */
1733 int erp_idx) /* index for new irec */
1734{
1735 xfs_ext_irec_t *erp; /* indirection array pointer */
1736 int i; /* loop counter */
1737 int nlists; /* number of irec's (ex lists) */
1738
1739 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1740 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1741
1742 /* Resize indirection array */
1743 xfs_iext_realloc_indirect(ifp, ++nlists *
1744 sizeof(xfs_ext_irec_t));
1745 /*
1746 * Move records down in the array so the
1747 * new page can use erp_idx.
1748 */
1749 erp = ifp->if_u1.if_ext_irec;
1750 for (i = nlists - 1; i > erp_idx; i--) {
1751 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1752 }
1753 ASSERT(i == erp_idx);
1754
1755 /* Initialize new extent record */
1756 erp = ifp->if_u1.if_ext_irec;
1757 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1758 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1759 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1760 erp[erp_idx].er_extcount = 0;
1761 erp[erp_idx].er_extoff = erp_idx > 0 ?
1762 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1763 return (&erp[erp_idx]);
1764}
1765
1766/*
1767 * Remove a record from the indirection array.
1768 */
1769void
1770xfs_iext_irec_remove(
1771 xfs_ifork_t *ifp, /* inode fork pointer */
1772 int erp_idx) /* irec index to remove */
1773{
1774 xfs_ext_irec_t *erp; /* indirection array pointer */
1775 int i; /* loop counter */
1776 int nlists; /* number of irec's (ex lists) */
1777
1778 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1779 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1780 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1781 if (erp->er_extbuf) {
1782 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1783 -erp->er_extcount);
1784 kmem_free(erp->er_extbuf);
1785 }
1786 /* Compact extent records */
1787 erp = ifp->if_u1.if_ext_irec;
1788 for (i = erp_idx; i < nlists - 1; i++) {
1789 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1790 }
1791 /*
1792 * Manually free the last extent record from the indirection
1793 * array. A call to xfs_iext_realloc_indirect() with a size
1794 * of zero would result in a call to xfs_iext_destroy() which
1795 * would in turn call this function again, creating a nasty
1796 * infinite loop.
1797 */
1798 if (--nlists) {
1799 xfs_iext_realloc_indirect(ifp,
1800 nlists * sizeof(xfs_ext_irec_t));
1801 } else {
1802 kmem_free(ifp->if_u1.if_ext_irec);
1803 }
1804 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1805}
1806
1807/*
1808 * This is called to clean up large amounts of unused memory allocated
1809 * by the indirection array. Before compacting anything though, verify
1810 * that the indirection array is still needed and switch back to the
1811 * linear extent list (or even the inline buffer) if possible. The
1812 * compaction policy is as follows:
1813 *
1814 * Full Compaction: Extents fit into a single page (or inline buffer)
1815 * Partial Compaction: Extents occupy less than 50% of allocated space
1816 * No Compaction: Extents occupy at least 50% of allocated space
1817 */
1818void
1819xfs_iext_irec_compact(
1820 xfs_ifork_t *ifp) /* inode fork pointer */
1821{
1822 xfs_extnum_t nextents; /* number of extents in file */
1823 int nlists; /* number of irec's (ex lists) */
1824
1825 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1826 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1827 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1828
1829 if (nextents == 0) {
1830 xfs_iext_destroy(ifp);
1831 } else if (nextents <= XFS_INLINE_EXTS) {
1832 xfs_iext_indirect_to_direct(ifp);
1833 xfs_iext_direct_to_inline(ifp, nextents);
1834 } else if (nextents <= XFS_LINEAR_EXTS) {
1835 xfs_iext_indirect_to_direct(ifp);
1836 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1837 xfs_iext_irec_compact_pages(ifp);
1838 }
1839}
1840
1841/*
1842 * Combine extents from neighboring extent pages.
1843 */
1844void
1845xfs_iext_irec_compact_pages(
1846 xfs_ifork_t *ifp) /* inode fork pointer */
1847{
1848 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1849 int erp_idx = 0; /* indirection array index */
1850 int nlists; /* number of irec's (ex lists) */
1851
1852 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1853 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1854 while (erp_idx < nlists - 1) {
1855 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1856 erp_next = erp + 1;
1857 if (erp_next->er_extcount <=
1858 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1859 memcpy(&erp->er_extbuf[erp->er_extcount],
1860 erp_next->er_extbuf, erp_next->er_extcount *
1861 sizeof(xfs_bmbt_rec_t));
1862 erp->er_extcount += erp_next->er_extcount;
1863 /*
1864 * Free page before removing extent record
1865 * so er_extoffs don't get modified in
1866 * xfs_iext_irec_remove.
1867 */
1868 kmem_free(erp_next->er_extbuf);
1869 erp_next->er_extbuf = NULL;
1870 xfs_iext_irec_remove(ifp, erp_idx + 1);
1871 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1872 } else {
1873 erp_idx++;
1874 }
1875 }
1876}
1877
1878/*
1879 * This is called to update the er_extoff field in the indirection
1880 * array when extents have been added or removed from one of the
1881 * extent lists. erp_idx contains the irec index to begin updating
1882 * at and ext_diff contains the number of extents that were added
1883 * or removed.
1884 */
1885void
1886xfs_iext_irec_update_extoffs(
1887 xfs_ifork_t *ifp, /* inode fork pointer */
1888 int erp_idx, /* irec index to update */
1889 int ext_diff) /* number of new extents */
1890{
1891 int i; /* loop counter */
1892 int nlists; /* number of irec's (ex lists */
1893
1894 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1895 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1896 for (i = erp_idx; i < nlists; i++) {
1897 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1898 }
1899}