blob: 2b60a5a2ae532288a41f1a0aee62119a05d4372c [file] [log] [blame]
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
23#include "xfs_log.h"
24#include "xfs_inum.h"
25#include "xfs_trans.h"
26#include "xfs_trans_priv.h"
27#include "xfs_sb.h"
28#include "xfs_ag.h"
29#include "xfs_mount.h"
30#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h"
32#include "xfs_ialloc_btree.h"
33#include "xfs_attr_sf.h"
34#include "xfs_dinode.h"
35#include "xfs_inode.h"
36#include "xfs_buf_item.h"
37#include "xfs_inode_item.h"
38#include "xfs_btree.h"
39#include "xfs_alloc.h"
40#include "xfs_ialloc.h"
41#include "xfs_bmap.h"
42#include "xfs_error.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100043#include "xfs_quota.h"
44#include "xfs_filestream.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100045#include "xfs_cksum.h"
46#include "xfs_trace.h"
47#include "xfs_icache.h"
48
49kmem_zone_t *xfs_ifork_zone;
50
51STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
52STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
53STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
54
55#ifdef DEBUG
56/*
57 * Make sure that the extents in the given memory buffer
58 * are valid.
59 */
60void
61xfs_validate_extents(
62 xfs_ifork_t *ifp,
63 int nrecs,
64 xfs_exntfmt_t fmt)
65{
66 xfs_bmbt_irec_t irec;
67 xfs_bmbt_rec_host_t rec;
68 int i;
69
70 for (i = 0; i < nrecs; i++) {
71 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
72 rec.l0 = get_unaligned(&ep->l0);
73 rec.l1 = get_unaligned(&ep->l1);
74 xfs_bmbt_get_all(&rec, &irec);
75 if (fmt == XFS_EXTFMT_NOSTATE)
76 ASSERT(irec.br_state == XFS_EXT_NORM);
77 }
78}
79#else /* DEBUG */
80#define xfs_validate_extents(ifp, nrecs, fmt)
81#endif /* DEBUG */
82
83
84/*
85 * Move inode type and inode format specific information from the
86 * on-disk inode to the in-core inode. For fifos, devs, and sockets
87 * this means set if_rdev to the proper value. For files, directories,
88 * and symlinks this means to bring in the in-line data or extent
89 * pointers. For a file in B-tree format, only the root is immediately
90 * brought in-core. The rest will be in-lined in if_extents when it
91 * is first referenced (see xfs_iread_extents()).
92 */
93int
94xfs_iformat_fork(
95 xfs_inode_t *ip,
96 xfs_dinode_t *dip)
97{
98 xfs_attr_shortform_t *atp;
99 int size;
100 int error = 0;
101 xfs_fsize_t di_size;
102
103 if (unlikely(be32_to_cpu(dip->di_nextents) +
104 be16_to_cpu(dip->di_anextents) >
105 be64_to_cpu(dip->di_nblocks))) {
106 xfs_warn(ip->i_mount,
107 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
108 (unsigned long long)ip->i_ino,
109 (int)(be32_to_cpu(dip->di_nextents) +
110 be16_to_cpu(dip->di_anextents)),
111 (unsigned long long)
112 be64_to_cpu(dip->di_nblocks));
113 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
114 ip->i_mount, dip);
115 return XFS_ERROR(EFSCORRUPTED);
116 }
117
118 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
119 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
120 (unsigned long long)ip->i_ino,
121 dip->di_forkoff);
122 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
123 ip->i_mount, dip);
124 return XFS_ERROR(EFSCORRUPTED);
125 }
126
127 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
128 !ip->i_mount->m_rtdev_targp)) {
129 xfs_warn(ip->i_mount,
130 "corrupt dinode %Lu, has realtime flag set.",
131 ip->i_ino);
132 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
133 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
134 return XFS_ERROR(EFSCORRUPTED);
135 }
136
137 switch (ip->i_d.di_mode & S_IFMT) {
138 case S_IFIFO:
139 case S_IFCHR:
140 case S_IFBLK:
141 case S_IFSOCK:
142 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
143 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
144 ip->i_mount, dip);
145 return XFS_ERROR(EFSCORRUPTED);
146 }
147 ip->i_d.di_size = 0;
148 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
149 break;
150
151 case S_IFREG:
152 case S_IFLNK:
153 case S_IFDIR:
154 switch (dip->di_format) {
155 case XFS_DINODE_FMT_LOCAL:
156 /*
157 * no local regular files yet
158 */
159 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
160 xfs_warn(ip->i_mount,
161 "corrupt inode %Lu (local format for regular file).",
162 (unsigned long long) ip->i_ino);
163 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
164 XFS_ERRLEVEL_LOW,
165 ip->i_mount, dip);
166 return XFS_ERROR(EFSCORRUPTED);
167 }
168
169 di_size = be64_to_cpu(dip->di_size);
170 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
171 xfs_warn(ip->i_mount,
172 "corrupt inode %Lu (bad size %Ld for local inode).",
173 (unsigned long long) ip->i_ino,
174 (long long) di_size);
175 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
176 XFS_ERRLEVEL_LOW,
177 ip->i_mount, dip);
178 return XFS_ERROR(EFSCORRUPTED);
179 }
180
181 size = (int)di_size;
182 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
183 break;
184 case XFS_DINODE_FMT_EXTENTS:
185 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
186 break;
187 case XFS_DINODE_FMT_BTREE:
188 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
189 break;
190 default:
191 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
192 ip->i_mount);
193 return XFS_ERROR(EFSCORRUPTED);
194 }
195 break;
196
197 default:
198 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
199 return XFS_ERROR(EFSCORRUPTED);
200 }
201 if (error) {
202 return error;
203 }
204 if (!XFS_DFORK_Q(dip))
205 return 0;
206
207 ASSERT(ip->i_afp == NULL);
208 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
209
210 switch (dip->di_aformat) {
211 case XFS_DINODE_FMT_LOCAL:
212 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
213 size = be16_to_cpu(atp->hdr.totsize);
214
215 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
216 xfs_warn(ip->i_mount,
217 "corrupt inode %Lu (bad attr fork size %Ld).",
218 (unsigned long long) ip->i_ino,
219 (long long) size);
220 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
221 XFS_ERRLEVEL_LOW,
222 ip->i_mount, dip);
223 return XFS_ERROR(EFSCORRUPTED);
224 }
225
226 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
227 break;
228 case XFS_DINODE_FMT_EXTENTS:
229 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
230 break;
231 case XFS_DINODE_FMT_BTREE:
232 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
233 break;
234 default:
235 error = XFS_ERROR(EFSCORRUPTED);
236 break;
237 }
238 if (error) {
239 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
240 ip->i_afp = NULL;
241 xfs_idestroy_fork(ip, XFS_DATA_FORK);
242 }
243 return error;
244}
245
246/*
247 * The file is in-lined in the on-disk inode.
248 * If it fits into if_inline_data, then copy
249 * it there, otherwise allocate a buffer for it
250 * and copy the data there. Either way, set
251 * if_data to point at the data.
252 * If we allocate a buffer for the data, make
253 * sure that its size is a multiple of 4 and
254 * record the real size in i_real_bytes.
255 */
256STATIC int
257xfs_iformat_local(
258 xfs_inode_t *ip,
259 xfs_dinode_t *dip,
260 int whichfork,
261 int size)
262{
263 xfs_ifork_t *ifp;
264 int real_size;
265
266 /*
267 * If the size is unreasonable, then something
268 * is wrong and we just bail out rather than crash in
269 * kmem_alloc() or memcpy() below.
270 */
271 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
272 xfs_warn(ip->i_mount,
273 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
274 (unsigned long long) ip->i_ino, size,
275 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
276 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
277 ip->i_mount, dip);
278 return XFS_ERROR(EFSCORRUPTED);
279 }
280 ifp = XFS_IFORK_PTR(ip, whichfork);
281 real_size = 0;
282 if (size == 0)
283 ifp->if_u1.if_data = NULL;
284 else if (size <= sizeof(ifp->if_u2.if_inline_data))
285 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
286 else {
287 real_size = roundup(size, 4);
288 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
289 }
290 ifp->if_bytes = size;
291 ifp->if_real_bytes = real_size;
292 if (size)
293 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
294 ifp->if_flags &= ~XFS_IFEXTENTS;
295 ifp->if_flags |= XFS_IFINLINE;
296 return 0;
297}
298
299/*
300 * The file consists of a set of extents all
301 * of which fit into the on-disk inode.
302 * If there are few enough extents to fit into
303 * the if_inline_ext, then copy them there.
304 * Otherwise allocate a buffer for them and copy
305 * them into it. Either way, set if_extents
306 * to point at the extents.
307 */
308STATIC int
309xfs_iformat_extents(
310 xfs_inode_t *ip,
311 xfs_dinode_t *dip,
312 int whichfork)
313{
314 xfs_bmbt_rec_t *dp;
315 xfs_ifork_t *ifp;
316 int nex;
317 int size;
318 int i;
319
320 ifp = XFS_IFORK_PTR(ip, whichfork);
321 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
322 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
323
324 /*
325 * If the number of extents is unreasonable, then something
326 * is wrong and we just bail out rather than crash in
327 * kmem_alloc() or memcpy() below.
328 */
329 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
330 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
331 (unsigned long long) ip->i_ino, nex);
332 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
333 ip->i_mount, dip);
334 return XFS_ERROR(EFSCORRUPTED);
335 }
336
337 ifp->if_real_bytes = 0;
338 if (nex == 0)
339 ifp->if_u1.if_extents = NULL;
340 else if (nex <= XFS_INLINE_EXTS)
341 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
342 else
343 xfs_iext_add(ifp, 0, nex);
344
345 ifp->if_bytes = size;
346 if (size) {
347 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
348 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
349 for (i = 0; i < nex; i++, dp++) {
350 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
351 ep->l0 = get_unaligned_be64(&dp->l0);
352 ep->l1 = get_unaligned_be64(&dp->l1);
353 }
354 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
355 if (whichfork != XFS_DATA_FORK ||
356 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
357 if (unlikely(xfs_check_nostate_extents(
358 ifp, 0, nex))) {
359 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
360 XFS_ERRLEVEL_LOW,
361 ip->i_mount);
362 return XFS_ERROR(EFSCORRUPTED);
363 }
364 }
365 ifp->if_flags |= XFS_IFEXTENTS;
366 return 0;
367}
368
369/*
370 * The file has too many extents to fit into
371 * the inode, so they are in B-tree format.
372 * Allocate a buffer for the root of the B-tree
373 * and copy the root into it. The i_extents
374 * field will remain NULL until all of the
375 * extents are read in (when they are needed).
376 */
377STATIC int
378xfs_iformat_btree(
379 xfs_inode_t *ip,
380 xfs_dinode_t *dip,
381 int whichfork)
382{
383 struct xfs_mount *mp = ip->i_mount;
384 xfs_bmdr_block_t *dfp;
385 xfs_ifork_t *ifp;
386 /* REFERENCED */
387 int nrecs;
388 int size;
389
390 ifp = XFS_IFORK_PTR(ip, whichfork);
391 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
392 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
393 nrecs = be16_to_cpu(dfp->bb_numrecs);
394
395 /*
396 * blow out if -- fork has less extents than can fit in
397 * fork (fork shouldn't be a btree format), root btree
398 * block has more records than can fit into the fork,
399 * or the number of extents is greater than the number of
400 * blocks.
401 */
402 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
403 XFS_IFORK_MAXEXT(ip, whichfork) ||
404 XFS_BMDR_SPACE_CALC(nrecs) >
405 XFS_DFORK_SIZE(dip, mp, whichfork) ||
406 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
407 xfs_warn(mp, "corrupt inode %Lu (btree).",
408 (unsigned long long) ip->i_ino);
409 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
410 mp, dip);
411 return XFS_ERROR(EFSCORRUPTED);
412 }
413
414 ifp->if_broot_bytes = size;
415 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
416 ASSERT(ifp->if_broot != NULL);
417 /*
418 * Copy and convert from the on-disk structure
419 * to the in-memory structure.
420 */
421 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
422 ifp->if_broot, size);
423 ifp->if_flags &= ~XFS_IFEXTENTS;
424 ifp->if_flags |= XFS_IFBROOT;
425
426 return 0;
427}
428
429/*
430 * Read in extents from a btree-format inode.
431 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
432 */
433int
434xfs_iread_extents(
435 xfs_trans_t *tp,
436 xfs_inode_t *ip,
437 int whichfork)
438{
439 int error;
440 xfs_ifork_t *ifp;
441 xfs_extnum_t nextents;
442
443 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
444 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
445 ip->i_mount);
446 return XFS_ERROR(EFSCORRUPTED);
447 }
448 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
449 ifp = XFS_IFORK_PTR(ip, whichfork);
450
451 /*
452 * We know that the size is valid (it's checked in iformat_btree)
453 */
454 ifp->if_bytes = ifp->if_real_bytes = 0;
455 ifp->if_flags |= XFS_IFEXTENTS;
456 xfs_iext_add(ifp, 0, nextents);
457 error = xfs_bmap_read_extents(tp, ip, whichfork);
458 if (error) {
459 xfs_iext_destroy(ifp);
460 ifp->if_flags &= ~XFS_IFEXTENTS;
461 return error;
462 }
463 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
464 return 0;
465}
466/*
467 * Reallocate the space for if_broot based on the number of records
468 * being added or deleted as indicated in rec_diff. Move the records
469 * and pointers in if_broot to fit the new size. When shrinking this
470 * will eliminate holes between the records and pointers created by
471 * the caller. When growing this will create holes to be filled in
472 * by the caller.
473 *
474 * The caller must not request to add more records than would fit in
475 * the on-disk inode root. If the if_broot is currently NULL, then
Zhi Yong Wuf6c27342013-08-07 10:11:04 +0000476 * if we are adding records, one will be allocated. The caller must also
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000477 * not request that the number of records go below zero, although
478 * it can go to zero.
479 *
480 * ip -- the inode whose if_broot area is changing
481 * ext_diff -- the change in the number of records, positive or negative,
482 * requested for the if_broot array.
483 */
484void
485xfs_iroot_realloc(
486 xfs_inode_t *ip,
487 int rec_diff,
488 int whichfork)
489{
490 struct xfs_mount *mp = ip->i_mount;
491 int cur_max;
492 xfs_ifork_t *ifp;
493 struct xfs_btree_block *new_broot;
494 int new_max;
495 size_t new_size;
496 char *np;
497 char *op;
498
499 /*
500 * Handle the degenerate case quietly.
501 */
502 if (rec_diff == 0) {
503 return;
504 }
505
506 ifp = XFS_IFORK_PTR(ip, whichfork);
507 if (rec_diff > 0) {
508 /*
509 * If there wasn't any memory allocated before, just
510 * allocate it now and get out.
511 */
512 if (ifp->if_broot_bytes == 0) {
513 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
514 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
515 ifp->if_broot_bytes = (int)new_size;
516 return;
517 }
518
519 /*
520 * If there is already an existing if_broot, then we need
521 * to realloc() it and shift the pointers to their new
522 * location. The records don't change location because
523 * they are kept butted up against the btree block header.
524 */
525 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
526 new_max = cur_max + rec_diff;
527 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
528 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
529 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
530 KM_SLEEP | KM_NOFS);
531 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
532 ifp->if_broot_bytes);
533 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
534 (int)new_size);
535 ifp->if_broot_bytes = (int)new_size;
536 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
537 XFS_IFORK_SIZE(ip, whichfork));
538 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
539 return;
540 }
541
542 /*
543 * rec_diff is less than 0. In this case, we are shrinking the
544 * if_broot buffer. It must already exist. If we go to zero
545 * records, just get rid of the root and clear the status bit.
546 */
547 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
548 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
549 new_max = cur_max + rec_diff;
550 ASSERT(new_max >= 0);
551 if (new_max > 0)
552 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
553 else
554 new_size = 0;
555 if (new_size > 0) {
556 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
557 /*
558 * First copy over the btree block header.
559 */
560 memcpy(new_broot, ifp->if_broot,
561 XFS_BMBT_BLOCK_LEN(ip->i_mount));
562 } else {
563 new_broot = NULL;
564 ifp->if_flags &= ~XFS_IFBROOT;
565 }
566
567 /*
568 * Only copy the records and pointers if there are any.
569 */
570 if (new_max > 0) {
571 /*
572 * First copy the records.
573 */
574 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
575 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
576 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
577
578 /*
579 * Then copy the pointers.
580 */
581 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
582 ifp->if_broot_bytes);
583 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
584 (int)new_size);
585 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
586 }
587 kmem_free(ifp->if_broot);
588 ifp->if_broot = new_broot;
589 ifp->if_broot_bytes = (int)new_size;
590 if (ifp->if_broot)
591 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
592 XFS_IFORK_SIZE(ip, whichfork));
593 return;
594}
595
596
597/*
598 * This is called when the amount of space needed for if_data
599 * is increased or decreased. The change in size is indicated by
600 * the number of bytes that need to be added or deleted in the
601 * byte_diff parameter.
602 *
603 * If the amount of space needed has decreased below the size of the
604 * inline buffer, then switch to using the inline buffer. Otherwise,
605 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
606 * to what is needed.
607 *
608 * ip -- the inode whose if_data area is changing
609 * byte_diff -- the change in the number of bytes, positive or negative,
610 * requested for the if_data array.
611 */
612void
613xfs_idata_realloc(
614 xfs_inode_t *ip,
615 int byte_diff,
616 int whichfork)
617{
618 xfs_ifork_t *ifp;
619 int new_size;
620 int real_size;
621
622 if (byte_diff == 0) {
623 return;
624 }
625
626 ifp = XFS_IFORK_PTR(ip, whichfork);
627 new_size = (int)ifp->if_bytes + byte_diff;
628 ASSERT(new_size >= 0);
629
630 if (new_size == 0) {
631 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
632 kmem_free(ifp->if_u1.if_data);
633 }
634 ifp->if_u1.if_data = NULL;
635 real_size = 0;
636 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
637 /*
638 * If the valid extents/data can fit in if_inline_ext/data,
639 * copy them from the malloc'd vector and free it.
640 */
641 if (ifp->if_u1.if_data == NULL) {
642 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
643 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
644 ASSERT(ifp->if_real_bytes != 0);
645 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
646 new_size);
647 kmem_free(ifp->if_u1.if_data);
648 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
649 }
650 real_size = 0;
651 } else {
652 /*
653 * Stuck with malloc/realloc.
654 * For inline data, the underlying buffer must be
655 * a multiple of 4 bytes in size so that it can be
656 * logged and stay on word boundaries. We enforce
657 * that here.
658 */
659 real_size = roundup(new_size, 4);
660 if (ifp->if_u1.if_data == NULL) {
661 ASSERT(ifp->if_real_bytes == 0);
662 ifp->if_u1.if_data = kmem_alloc(real_size,
663 KM_SLEEP | KM_NOFS);
664 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
665 /*
666 * Only do the realloc if the underlying size
667 * is really changing.
668 */
669 if (ifp->if_real_bytes != real_size) {
670 ifp->if_u1.if_data =
671 kmem_realloc(ifp->if_u1.if_data,
672 real_size,
673 ifp->if_real_bytes,
674 KM_SLEEP | KM_NOFS);
675 }
676 } else {
677 ASSERT(ifp->if_real_bytes == 0);
678 ifp->if_u1.if_data = kmem_alloc(real_size,
679 KM_SLEEP | KM_NOFS);
680 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
681 ifp->if_bytes);
682 }
683 }
684 ifp->if_real_bytes = real_size;
685 ifp->if_bytes = new_size;
686 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
687}
688
689void
690xfs_idestroy_fork(
691 xfs_inode_t *ip,
692 int whichfork)
693{
694 xfs_ifork_t *ifp;
695
696 ifp = XFS_IFORK_PTR(ip, whichfork);
697 if (ifp->if_broot != NULL) {
698 kmem_free(ifp->if_broot);
699 ifp->if_broot = NULL;
700 }
701
702 /*
703 * If the format is local, then we can't have an extents
704 * array so just look for an inline data array. If we're
705 * not local then we may or may not have an extents list,
706 * so check and free it up if we do.
707 */
708 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
709 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
710 (ifp->if_u1.if_data != NULL)) {
711 ASSERT(ifp->if_real_bytes != 0);
712 kmem_free(ifp->if_u1.if_data);
713 ifp->if_u1.if_data = NULL;
714 ifp->if_real_bytes = 0;
715 }
716 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
717 ((ifp->if_flags & XFS_IFEXTIREC) ||
718 ((ifp->if_u1.if_extents != NULL) &&
719 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
720 ASSERT(ifp->if_real_bytes != 0);
721 xfs_iext_destroy(ifp);
722 }
723 ASSERT(ifp->if_u1.if_extents == NULL ||
724 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
725 ASSERT(ifp->if_real_bytes == 0);
726 if (whichfork == XFS_ATTR_FORK) {
727 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
728 ip->i_afp = NULL;
729 }
730}
731
732/*
733 * xfs_iextents_copy()
734 *
735 * This is called to copy the REAL extents (as opposed to the delayed
736 * allocation extents) from the inode into the given buffer. It
737 * returns the number of bytes copied into the buffer.
738 *
739 * If there are no delayed allocation extents, then we can just
740 * memcpy() the extents into the buffer. Otherwise, we need to
741 * examine each extent in turn and skip those which are delayed.
742 */
743int
744xfs_iextents_copy(
745 xfs_inode_t *ip,
746 xfs_bmbt_rec_t *dp,
747 int whichfork)
748{
749 int copied;
750 int i;
751 xfs_ifork_t *ifp;
752 int nrecs;
753 xfs_fsblock_t start_block;
754
755 ifp = XFS_IFORK_PTR(ip, whichfork);
756 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
757 ASSERT(ifp->if_bytes > 0);
758
759 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
760 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
761 ASSERT(nrecs > 0);
762
763 /*
764 * There are some delayed allocation extents in the
765 * inode, so copy the extents one at a time and skip
766 * the delayed ones. There must be at least one
767 * non-delayed extent.
768 */
769 copied = 0;
770 for (i = 0; i < nrecs; i++) {
771 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
772 start_block = xfs_bmbt_get_startblock(ep);
773 if (isnullstartblock(start_block)) {
774 /*
775 * It's a delayed allocation extent, so skip it.
776 */
777 continue;
778 }
779
780 /* Translate to on disk format */
Dave Chinnerc5c249b2013-08-12 20:49:43 +1000781 put_unaligned_be64(ep->l0, &dp->l0);
782 put_unaligned_be64(ep->l1, &dp->l1);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000783 dp++;
784 copied++;
785 }
786 ASSERT(copied != 0);
787 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
788
789 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
790}
791
792/*
793 * Each of the following cases stores data into the same region
794 * of the on-disk inode, so only one of them can be valid at
795 * any given time. While it is possible to have conflicting formats
796 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
797 * in EXTENTS format, this can only happen when the fork has
798 * changed formats after being modified but before being flushed.
799 * In these cases, the format always takes precedence, because the
800 * format indicates the current state of the fork.
801 */
802void
803xfs_iflush_fork(
804 xfs_inode_t *ip,
805 xfs_dinode_t *dip,
806 xfs_inode_log_item_t *iip,
807 int whichfork,
808 xfs_buf_t *bp)
809{
810 char *cp;
811 xfs_ifork_t *ifp;
812 xfs_mount_t *mp;
813 static const short brootflag[2] =
814 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
815 static const short dataflag[2] =
816 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
817 static const short extflag[2] =
818 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
819
820 if (!iip)
821 return;
822 ifp = XFS_IFORK_PTR(ip, whichfork);
823 /*
824 * This can happen if we gave up in iformat in an error path,
825 * for the attribute fork.
826 */
827 if (!ifp) {
828 ASSERT(whichfork == XFS_ATTR_FORK);
829 return;
830 }
831 cp = XFS_DFORK_PTR(dip, whichfork);
832 mp = ip->i_mount;
833 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
834 case XFS_DINODE_FMT_LOCAL:
835 if ((iip->ili_fields & dataflag[whichfork]) &&
836 (ifp->if_bytes > 0)) {
837 ASSERT(ifp->if_u1.if_data != NULL);
838 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
839 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
840 }
841 break;
842
843 case XFS_DINODE_FMT_EXTENTS:
844 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
845 !(iip->ili_fields & extflag[whichfork]));
846 if ((iip->ili_fields & extflag[whichfork]) &&
847 (ifp->if_bytes > 0)) {
848 ASSERT(xfs_iext_get_ext(ifp, 0));
849 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
850 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
851 whichfork);
852 }
853 break;
854
855 case XFS_DINODE_FMT_BTREE:
856 if ((iip->ili_fields & brootflag[whichfork]) &&
857 (ifp->if_broot_bytes > 0)) {
858 ASSERT(ifp->if_broot != NULL);
859 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
860 XFS_IFORK_SIZE(ip, whichfork));
861 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
862 (xfs_bmdr_block_t *)cp,
863 XFS_DFORK_SIZE(dip, mp, whichfork));
864 }
865 break;
866
867 case XFS_DINODE_FMT_DEV:
868 if (iip->ili_fields & XFS_ILOG_DEV) {
869 ASSERT(whichfork == XFS_DATA_FORK);
870 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
871 }
872 break;
873
874 case XFS_DINODE_FMT_UUID:
875 if (iip->ili_fields & XFS_ILOG_UUID) {
876 ASSERT(whichfork == XFS_DATA_FORK);
877 memcpy(XFS_DFORK_DPTR(dip),
878 &ip->i_df.if_u2.if_uuid,
879 sizeof(uuid_t));
880 }
881 break;
882
883 default:
884 ASSERT(0);
885 break;
886 }
887}
888
889/*
890 * Return a pointer to the extent record at file index idx.
891 */
892xfs_bmbt_rec_host_t *
893xfs_iext_get_ext(
894 xfs_ifork_t *ifp, /* inode fork pointer */
895 xfs_extnum_t idx) /* index of target extent */
896{
897 ASSERT(idx >= 0);
898 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
899
900 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
901 return ifp->if_u1.if_ext_irec->er_extbuf;
902 } else if (ifp->if_flags & XFS_IFEXTIREC) {
903 xfs_ext_irec_t *erp; /* irec pointer */
904 int erp_idx = 0; /* irec index */
905 xfs_extnum_t page_idx = idx; /* ext index in target list */
906
907 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
908 return &erp->er_extbuf[page_idx];
909 } else if (ifp->if_bytes) {
910 return &ifp->if_u1.if_extents[idx];
911 } else {
912 return NULL;
913 }
914}
915
916/*
917 * Insert new item(s) into the extent records for incore inode
918 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
919 */
920void
921xfs_iext_insert(
922 xfs_inode_t *ip, /* incore inode pointer */
923 xfs_extnum_t idx, /* starting index of new items */
924 xfs_extnum_t count, /* number of inserted items */
925 xfs_bmbt_irec_t *new, /* items to insert */
926 int state) /* type of extent conversion */
927{
928 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
929 xfs_extnum_t i; /* extent record index */
930
931 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
932
933 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
934 xfs_iext_add(ifp, idx, count);
935 for (i = idx; i < idx + count; i++, new++)
936 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
937}
938
939/*
940 * This is called when the amount of space required for incore file
941 * extents needs to be increased. The ext_diff parameter stores the
942 * number of new extents being added and the idx parameter contains
943 * the extent index where the new extents will be added. If the new
944 * extents are being appended, then we just need to (re)allocate and
945 * initialize the space. Otherwise, if the new extents are being
946 * inserted into the middle of the existing entries, a bit more work
947 * is required to make room for the new extents to be inserted. The
948 * caller is responsible for filling in the new extent entries upon
949 * return.
950 */
951void
952xfs_iext_add(
953 xfs_ifork_t *ifp, /* inode fork pointer */
954 xfs_extnum_t idx, /* index to begin adding exts */
955 int ext_diff) /* number of extents to add */
956{
957 int byte_diff; /* new bytes being added */
958 int new_size; /* size of extents after adding */
959 xfs_extnum_t nextents; /* number of extents in file */
960
961 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
962 ASSERT((idx >= 0) && (idx <= nextents));
963 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
964 new_size = ifp->if_bytes + byte_diff;
965 /*
966 * If the new number of extents (nextents + ext_diff)
967 * fits inside the inode, then continue to use the inline
968 * extent buffer.
969 */
970 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
971 if (idx < nextents) {
972 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
973 &ifp->if_u2.if_inline_ext[idx],
974 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
975 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
976 }
977 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
978 ifp->if_real_bytes = 0;
979 }
980 /*
981 * Otherwise use a linear (direct) extent list.
982 * If the extents are currently inside the inode,
983 * xfs_iext_realloc_direct will switch us from
984 * inline to direct extent allocation mode.
985 */
986 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
987 xfs_iext_realloc_direct(ifp, new_size);
988 if (idx < nextents) {
989 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
990 &ifp->if_u1.if_extents[idx],
991 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
992 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
993 }
994 }
995 /* Indirection array */
996 else {
997 xfs_ext_irec_t *erp;
998 int erp_idx = 0;
999 int page_idx = idx;
1000
1001 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
1002 if (ifp->if_flags & XFS_IFEXTIREC) {
1003 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
1004 } else {
1005 xfs_iext_irec_init(ifp);
1006 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1007 erp = ifp->if_u1.if_ext_irec;
1008 }
1009 /* Extents fit in target extent page */
1010 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1011 if (page_idx < erp->er_extcount) {
1012 memmove(&erp->er_extbuf[page_idx + ext_diff],
1013 &erp->er_extbuf[page_idx],
1014 (erp->er_extcount - page_idx) *
1015 sizeof(xfs_bmbt_rec_t));
1016 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1017 }
1018 erp->er_extcount += ext_diff;
1019 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1020 }
1021 /* Insert a new extent page */
1022 else if (erp) {
1023 xfs_iext_add_indirect_multi(ifp,
1024 erp_idx, page_idx, ext_diff);
1025 }
1026 /*
1027 * If extent(s) are being appended to the last page in
1028 * the indirection array and the new extent(s) don't fit
1029 * in the page, then erp is NULL and erp_idx is set to
1030 * the next index needed in the indirection array.
1031 */
1032 else {
1033 int count = ext_diff;
1034
1035 while (count) {
1036 erp = xfs_iext_irec_new(ifp, erp_idx);
1037 erp->er_extcount = count;
1038 count -= MIN(count, (int)XFS_LINEAR_EXTS);
1039 if (count) {
1040 erp_idx++;
1041 }
1042 }
1043 }
1044 }
1045 ifp->if_bytes = new_size;
1046}
1047
1048/*
1049 * This is called when incore extents are being added to the indirection
1050 * array and the new extents do not fit in the target extent list. The
1051 * erp_idx parameter contains the irec index for the target extent list
1052 * in the indirection array, and the idx parameter contains the extent
1053 * index within the list. The number of extents being added is stored
1054 * in the count parameter.
1055 *
1056 * |-------| |-------|
1057 * | | | | idx - number of extents before idx
1058 * | idx | | count |
1059 * | | | | count - number of extents being inserted at idx
1060 * |-------| |-------|
1061 * | count | | nex2 | nex2 - number of extents after idx + count
1062 * |-------| |-------|
1063 */
1064void
1065xfs_iext_add_indirect_multi(
1066 xfs_ifork_t *ifp, /* inode fork pointer */
1067 int erp_idx, /* target extent irec index */
1068 xfs_extnum_t idx, /* index within target list */
1069 int count) /* new extents being added */
1070{
1071 int byte_diff; /* new bytes being added */
1072 xfs_ext_irec_t *erp; /* pointer to irec entry */
1073 xfs_extnum_t ext_diff; /* number of extents to add */
1074 xfs_extnum_t ext_cnt; /* new extents still needed */
1075 xfs_extnum_t nex2; /* extents after idx + count */
1076 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1077 int nlists; /* number of irec's (lists) */
1078
1079 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1080 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1081 nex2 = erp->er_extcount - idx;
1082 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1083
1084 /*
1085 * Save second part of target extent list
1086 * (all extents past */
1087 if (nex2) {
1088 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1089 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1090 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1091 erp->er_extcount -= nex2;
1092 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1093 memset(&erp->er_extbuf[idx], 0, byte_diff);
1094 }
1095
1096 /*
1097 * Add the new extents to the end of the target
1098 * list, then allocate new irec record(s) and
1099 * extent buffer(s) as needed to store the rest
1100 * of the new extents.
1101 */
1102 ext_cnt = count;
1103 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1104 if (ext_diff) {
1105 erp->er_extcount += ext_diff;
1106 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1107 ext_cnt -= ext_diff;
1108 }
1109 while (ext_cnt) {
1110 erp_idx++;
1111 erp = xfs_iext_irec_new(ifp, erp_idx);
1112 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1113 erp->er_extcount = ext_diff;
1114 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1115 ext_cnt -= ext_diff;
1116 }
1117
1118 /* Add nex2 extents back to indirection array */
1119 if (nex2) {
1120 xfs_extnum_t ext_avail;
1121 int i;
1122
1123 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1124 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1125 i = 0;
1126 /*
1127 * If nex2 extents fit in the current page, append
1128 * nex2_ep after the new extents.
1129 */
1130 if (nex2 <= ext_avail) {
1131 i = erp->er_extcount;
1132 }
1133 /*
1134 * Otherwise, check if space is available in the
1135 * next page.
1136 */
1137 else if ((erp_idx < nlists - 1) &&
1138 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1139 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1140 erp_idx++;
1141 erp++;
1142 /* Create a hole for nex2 extents */
1143 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1144 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1145 }
1146 /*
1147 * Final choice, create a new extent page for
1148 * nex2 extents.
1149 */
1150 else {
1151 erp_idx++;
1152 erp = xfs_iext_irec_new(ifp, erp_idx);
1153 }
1154 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1155 kmem_free(nex2_ep);
1156 erp->er_extcount += nex2;
1157 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1158 }
1159}
1160
1161/*
1162 * This is called when the amount of space required for incore file
1163 * extents needs to be decreased. The ext_diff parameter stores the
1164 * number of extents to be removed and the idx parameter contains
1165 * the extent index where the extents will be removed from.
1166 *
1167 * If the amount of space needed has decreased below the linear
1168 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1169 * extent array. Otherwise, use kmem_realloc() to adjust the
1170 * size to what is needed.
1171 */
1172void
1173xfs_iext_remove(
1174 xfs_inode_t *ip, /* incore inode pointer */
1175 xfs_extnum_t idx, /* index to begin removing exts */
1176 int ext_diff, /* number of extents to remove */
1177 int state) /* type of extent conversion */
1178{
1179 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1180 xfs_extnum_t nextents; /* number of extents in file */
1181 int new_size; /* size of extents after removal */
1182
1183 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1184
1185 ASSERT(ext_diff > 0);
1186 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1187 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1188
1189 if (new_size == 0) {
1190 xfs_iext_destroy(ifp);
1191 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1192 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1193 } else if (ifp->if_real_bytes) {
1194 xfs_iext_remove_direct(ifp, idx, ext_diff);
1195 } else {
1196 xfs_iext_remove_inline(ifp, idx, ext_diff);
1197 }
1198 ifp->if_bytes = new_size;
1199}
1200
1201/*
1202 * This removes ext_diff extents from the inline buffer, beginning
1203 * at extent index idx.
1204 */
1205void
1206xfs_iext_remove_inline(
1207 xfs_ifork_t *ifp, /* inode fork pointer */
1208 xfs_extnum_t idx, /* index to begin removing exts */
1209 int ext_diff) /* number of extents to remove */
1210{
1211 int nextents; /* number of extents in file */
1212
1213 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1214 ASSERT(idx < XFS_INLINE_EXTS);
1215 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1216 ASSERT(((nextents - ext_diff) > 0) &&
1217 (nextents - ext_diff) < XFS_INLINE_EXTS);
1218
1219 if (idx + ext_diff < nextents) {
1220 memmove(&ifp->if_u2.if_inline_ext[idx],
1221 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1222 (nextents - (idx + ext_diff)) *
1223 sizeof(xfs_bmbt_rec_t));
1224 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1225 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1226 } else {
1227 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1228 ext_diff * sizeof(xfs_bmbt_rec_t));
1229 }
1230}
1231
1232/*
1233 * This removes ext_diff extents from a linear (direct) extent list,
1234 * beginning at extent index idx. If the extents are being removed
1235 * from the end of the list (ie. truncate) then we just need to re-
1236 * allocate the list to remove the extra space. Otherwise, if the
1237 * extents are being removed from the middle of the existing extent
1238 * entries, then we first need to move the extent records beginning
1239 * at idx + ext_diff up in the list to overwrite the records being
1240 * removed, then remove the extra space via kmem_realloc.
1241 */
1242void
1243xfs_iext_remove_direct(
1244 xfs_ifork_t *ifp, /* inode fork pointer */
1245 xfs_extnum_t idx, /* index to begin removing exts */
1246 int ext_diff) /* number of extents to remove */
1247{
1248 xfs_extnum_t nextents; /* number of extents in file */
1249 int new_size; /* size of extents after removal */
1250
1251 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1252 new_size = ifp->if_bytes -
1253 (ext_diff * sizeof(xfs_bmbt_rec_t));
1254 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1255
1256 if (new_size == 0) {
1257 xfs_iext_destroy(ifp);
1258 return;
1259 }
1260 /* Move extents up in the list (if needed) */
1261 if (idx + ext_diff < nextents) {
1262 memmove(&ifp->if_u1.if_extents[idx],
1263 &ifp->if_u1.if_extents[idx + ext_diff],
1264 (nextents - (idx + ext_diff)) *
1265 sizeof(xfs_bmbt_rec_t));
1266 }
1267 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1268 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1269 /*
1270 * Reallocate the direct extent list. If the extents
1271 * will fit inside the inode then xfs_iext_realloc_direct
1272 * will switch from direct to inline extent allocation
1273 * mode for us.
1274 */
1275 xfs_iext_realloc_direct(ifp, new_size);
1276 ifp->if_bytes = new_size;
1277}
1278
1279/*
1280 * This is called when incore extents are being removed from the
1281 * indirection array and the extents being removed span multiple extent
1282 * buffers. The idx parameter contains the file extent index where we
1283 * want to begin removing extents, and the count parameter contains
1284 * how many extents need to be removed.
1285 *
1286 * |-------| |-------|
1287 * | nex1 | | | nex1 - number of extents before idx
1288 * |-------| | count |
1289 * | | | | count - number of extents being removed at idx
1290 * | count | |-------|
1291 * | | | nex2 | nex2 - number of extents after idx + count
1292 * |-------| |-------|
1293 */
1294void
1295xfs_iext_remove_indirect(
1296 xfs_ifork_t *ifp, /* inode fork pointer */
1297 xfs_extnum_t idx, /* index to begin removing extents */
1298 int count) /* number of extents to remove */
1299{
1300 xfs_ext_irec_t *erp; /* indirection array pointer */
1301 int erp_idx = 0; /* indirection array index */
1302 xfs_extnum_t ext_cnt; /* extents left to remove */
1303 xfs_extnum_t ext_diff; /* extents to remove in current list */
1304 xfs_extnum_t nex1; /* number of extents before idx */
1305 xfs_extnum_t nex2; /* extents after idx + count */
1306 int page_idx = idx; /* index in target extent list */
1307
1308 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1309 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1310 ASSERT(erp != NULL);
1311 nex1 = page_idx;
1312 ext_cnt = count;
1313 while (ext_cnt) {
1314 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1315 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1316 /*
1317 * Check for deletion of entire list;
1318 * xfs_iext_irec_remove() updates extent offsets.
1319 */
1320 if (ext_diff == erp->er_extcount) {
1321 xfs_iext_irec_remove(ifp, erp_idx);
1322 ext_cnt -= ext_diff;
1323 nex1 = 0;
1324 if (ext_cnt) {
1325 ASSERT(erp_idx < ifp->if_real_bytes /
1326 XFS_IEXT_BUFSZ);
1327 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1328 nex1 = 0;
1329 continue;
1330 } else {
1331 break;
1332 }
1333 }
1334 /* Move extents up (if needed) */
1335 if (nex2) {
1336 memmove(&erp->er_extbuf[nex1],
1337 &erp->er_extbuf[nex1 + ext_diff],
1338 nex2 * sizeof(xfs_bmbt_rec_t));
1339 }
1340 /* Zero out rest of page */
1341 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1342 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1343 /* Update remaining counters */
1344 erp->er_extcount -= ext_diff;
1345 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1346 ext_cnt -= ext_diff;
1347 nex1 = 0;
1348 erp_idx++;
1349 erp++;
1350 }
1351 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1352 xfs_iext_irec_compact(ifp);
1353}
1354
1355/*
1356 * Create, destroy, or resize a linear (direct) block of extents.
1357 */
1358void
1359xfs_iext_realloc_direct(
1360 xfs_ifork_t *ifp, /* inode fork pointer */
1361 int new_size) /* new size of extents */
1362{
1363 int rnew_size; /* real new size of extents */
1364
1365 rnew_size = new_size;
1366
1367 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1368 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1369 (new_size != ifp->if_real_bytes)));
1370
1371 /* Free extent records */
1372 if (new_size == 0) {
1373 xfs_iext_destroy(ifp);
1374 }
1375 /* Resize direct extent list and zero any new bytes */
1376 else if (ifp->if_real_bytes) {
1377 /* Check if extents will fit inside the inode */
1378 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1379 xfs_iext_direct_to_inline(ifp, new_size /
1380 (uint)sizeof(xfs_bmbt_rec_t));
1381 ifp->if_bytes = new_size;
1382 return;
1383 }
1384 if (!is_power_of_2(new_size)){
1385 rnew_size = roundup_pow_of_two(new_size);
1386 }
1387 if (rnew_size != ifp->if_real_bytes) {
1388 ifp->if_u1.if_extents =
1389 kmem_realloc(ifp->if_u1.if_extents,
1390 rnew_size,
1391 ifp->if_real_bytes, KM_NOFS);
1392 }
1393 if (rnew_size > ifp->if_real_bytes) {
1394 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1395 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1396 rnew_size - ifp->if_real_bytes);
1397 }
1398 }
1399 /*
1400 * Switch from the inline extent buffer to a direct
1401 * extent list. Be sure to include the inline extent
1402 * bytes in new_size.
1403 */
1404 else {
1405 new_size += ifp->if_bytes;
1406 if (!is_power_of_2(new_size)) {
1407 rnew_size = roundup_pow_of_two(new_size);
1408 }
1409 xfs_iext_inline_to_direct(ifp, rnew_size);
1410 }
1411 ifp->if_real_bytes = rnew_size;
1412 ifp->if_bytes = new_size;
1413}
1414
1415/*
1416 * Switch from linear (direct) extent records to inline buffer.
1417 */
1418void
1419xfs_iext_direct_to_inline(
1420 xfs_ifork_t *ifp, /* inode fork pointer */
1421 xfs_extnum_t nextents) /* number of extents in file */
1422{
1423 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1424 ASSERT(nextents <= XFS_INLINE_EXTS);
1425 /*
1426 * The inline buffer was zeroed when we switched
1427 * from inline to direct extent allocation mode,
1428 * so we don't need to clear it here.
1429 */
1430 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1431 nextents * sizeof(xfs_bmbt_rec_t));
1432 kmem_free(ifp->if_u1.if_extents);
1433 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1434 ifp->if_real_bytes = 0;
1435}
1436
1437/*
1438 * Switch from inline buffer to linear (direct) extent records.
1439 * new_size should already be rounded up to the next power of 2
1440 * by the caller (when appropriate), so use new_size as it is.
1441 * However, since new_size may be rounded up, we can't update
1442 * if_bytes here. It is the caller's responsibility to update
1443 * if_bytes upon return.
1444 */
1445void
1446xfs_iext_inline_to_direct(
1447 xfs_ifork_t *ifp, /* inode fork pointer */
1448 int new_size) /* number of extents in file */
1449{
1450 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1451 memset(ifp->if_u1.if_extents, 0, new_size);
1452 if (ifp->if_bytes) {
1453 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1454 ifp->if_bytes);
1455 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1456 sizeof(xfs_bmbt_rec_t));
1457 }
1458 ifp->if_real_bytes = new_size;
1459}
1460
1461/*
1462 * Resize an extent indirection array to new_size bytes.
1463 */
1464STATIC void
1465xfs_iext_realloc_indirect(
1466 xfs_ifork_t *ifp, /* inode fork pointer */
1467 int new_size) /* new indirection array size */
1468{
1469 int nlists; /* number of irec's (ex lists) */
1470 int size; /* current indirection array size */
1471
1472 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1473 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1474 size = nlists * sizeof(xfs_ext_irec_t);
1475 ASSERT(ifp->if_real_bytes);
1476 ASSERT((new_size >= 0) && (new_size != size));
1477 if (new_size == 0) {
1478 xfs_iext_destroy(ifp);
1479 } else {
1480 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1481 kmem_realloc(ifp->if_u1.if_ext_irec,
1482 new_size, size, KM_NOFS);
1483 }
1484}
1485
1486/*
1487 * Switch from indirection array to linear (direct) extent allocations.
1488 */
1489STATIC void
1490xfs_iext_indirect_to_direct(
1491 xfs_ifork_t *ifp) /* inode fork pointer */
1492{
1493 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1494 xfs_extnum_t nextents; /* number of extents in file */
1495 int size; /* size of file extents */
1496
1497 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1498 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1499 ASSERT(nextents <= XFS_LINEAR_EXTS);
1500 size = nextents * sizeof(xfs_bmbt_rec_t);
1501
1502 xfs_iext_irec_compact_pages(ifp);
1503 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1504
1505 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1506 kmem_free(ifp->if_u1.if_ext_irec);
1507 ifp->if_flags &= ~XFS_IFEXTIREC;
1508 ifp->if_u1.if_extents = ep;
1509 ifp->if_bytes = size;
1510 if (nextents < XFS_LINEAR_EXTS) {
1511 xfs_iext_realloc_direct(ifp, size);
1512 }
1513}
1514
1515/*
1516 * Free incore file extents.
1517 */
1518void
1519xfs_iext_destroy(
1520 xfs_ifork_t *ifp) /* inode fork pointer */
1521{
1522 if (ifp->if_flags & XFS_IFEXTIREC) {
1523 int erp_idx;
1524 int nlists;
1525
1526 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1527 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1528 xfs_iext_irec_remove(ifp, erp_idx);
1529 }
1530 ifp->if_flags &= ~XFS_IFEXTIREC;
1531 } else if (ifp->if_real_bytes) {
1532 kmem_free(ifp->if_u1.if_extents);
1533 } else if (ifp->if_bytes) {
1534 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1535 sizeof(xfs_bmbt_rec_t));
1536 }
1537 ifp->if_u1.if_extents = NULL;
1538 ifp->if_real_bytes = 0;
1539 ifp->if_bytes = 0;
1540}
1541
1542/*
1543 * Return a pointer to the extent record for file system block bno.
1544 */
1545xfs_bmbt_rec_host_t * /* pointer to found extent record */
1546xfs_iext_bno_to_ext(
1547 xfs_ifork_t *ifp, /* inode fork pointer */
1548 xfs_fileoff_t bno, /* block number to search for */
1549 xfs_extnum_t *idxp) /* index of target extent */
1550{
1551 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1552 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1553 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1554 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1555 int high; /* upper boundary in search */
1556 xfs_extnum_t idx = 0; /* index of target extent */
1557 int low; /* lower boundary in search */
1558 xfs_extnum_t nextents; /* number of file extents */
1559 xfs_fileoff_t startoff = 0; /* start offset of extent */
1560
1561 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1562 if (nextents == 0) {
1563 *idxp = 0;
1564 return NULL;
1565 }
1566 low = 0;
1567 if (ifp->if_flags & XFS_IFEXTIREC) {
1568 /* Find target extent list */
1569 int erp_idx = 0;
1570 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1571 base = erp->er_extbuf;
1572 high = erp->er_extcount - 1;
1573 } else {
1574 base = ifp->if_u1.if_extents;
1575 high = nextents - 1;
1576 }
1577 /* Binary search extent records */
1578 while (low <= high) {
1579 idx = (low + high) >> 1;
1580 ep = base + idx;
1581 startoff = xfs_bmbt_get_startoff(ep);
1582 blockcount = xfs_bmbt_get_blockcount(ep);
1583 if (bno < startoff) {
1584 high = idx - 1;
1585 } else if (bno >= startoff + blockcount) {
1586 low = idx + 1;
1587 } else {
1588 /* Convert back to file-based extent index */
1589 if (ifp->if_flags & XFS_IFEXTIREC) {
1590 idx += erp->er_extoff;
1591 }
1592 *idxp = idx;
1593 return ep;
1594 }
1595 }
1596 /* Convert back to file-based extent index */
1597 if (ifp->if_flags & XFS_IFEXTIREC) {
1598 idx += erp->er_extoff;
1599 }
1600 if (bno >= startoff + blockcount) {
1601 if (++idx == nextents) {
1602 ep = NULL;
1603 } else {
1604 ep = xfs_iext_get_ext(ifp, idx);
1605 }
1606 }
1607 *idxp = idx;
1608 return ep;
1609}
1610
1611/*
1612 * Return a pointer to the indirection array entry containing the
1613 * extent record for filesystem block bno. Store the index of the
1614 * target irec in *erp_idxp.
1615 */
1616xfs_ext_irec_t * /* pointer to found extent record */
1617xfs_iext_bno_to_irec(
1618 xfs_ifork_t *ifp, /* inode fork pointer */
1619 xfs_fileoff_t bno, /* block number to search for */
1620 int *erp_idxp) /* irec index of target ext list */
1621{
1622 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1623 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1624 int erp_idx; /* indirection array index */
1625 int nlists; /* number of extent irec's (lists) */
1626 int high; /* binary search upper limit */
1627 int low; /* binary search lower limit */
1628
1629 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1630 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1631 erp_idx = 0;
1632 low = 0;
1633 high = nlists - 1;
1634 while (low <= high) {
1635 erp_idx = (low + high) >> 1;
1636 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1637 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1638 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1639 high = erp_idx - 1;
1640 } else if (erp_next && bno >=
1641 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1642 low = erp_idx + 1;
1643 } else {
1644 break;
1645 }
1646 }
1647 *erp_idxp = erp_idx;
1648 return erp;
1649}
1650
1651/*
1652 * Return a pointer to the indirection array entry containing the
1653 * extent record at file extent index *idxp. Store the index of the
1654 * target irec in *erp_idxp and store the page index of the target
1655 * extent record in *idxp.
1656 */
1657xfs_ext_irec_t *
1658xfs_iext_idx_to_irec(
1659 xfs_ifork_t *ifp, /* inode fork pointer */
1660 xfs_extnum_t *idxp, /* extent index (file -> page) */
1661 int *erp_idxp, /* pointer to target irec */
1662 int realloc) /* new bytes were just added */
1663{
1664 xfs_ext_irec_t *prev; /* pointer to previous irec */
1665 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1666 int erp_idx; /* indirection array index */
1667 int nlists; /* number of irec's (ex lists) */
1668 int high; /* binary search upper limit */
1669 int low; /* binary search lower limit */
1670 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1671
1672 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1673 ASSERT(page_idx >= 0);
1674 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1675 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1676
1677 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1678 erp_idx = 0;
1679 low = 0;
1680 high = nlists - 1;
1681
1682 /* Binary search extent irec's */
1683 while (low <= high) {
1684 erp_idx = (low + high) >> 1;
1685 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1686 prev = erp_idx > 0 ? erp - 1 : NULL;
1687 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1688 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1689 high = erp_idx - 1;
1690 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1691 (page_idx == erp->er_extoff + erp->er_extcount &&
1692 !realloc)) {
1693 low = erp_idx + 1;
1694 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1695 erp->er_extcount == XFS_LINEAR_EXTS) {
1696 ASSERT(realloc);
1697 page_idx = 0;
1698 erp_idx++;
1699 erp = erp_idx < nlists ? erp + 1 : NULL;
1700 break;
1701 } else {
1702 page_idx -= erp->er_extoff;
1703 break;
1704 }
1705 }
1706 *idxp = page_idx;
1707 *erp_idxp = erp_idx;
1708 return(erp);
1709}
1710
1711/*
1712 * Allocate and initialize an indirection array once the space needed
1713 * for incore extents increases above XFS_IEXT_BUFSZ.
1714 */
1715void
1716xfs_iext_irec_init(
1717 xfs_ifork_t *ifp) /* inode fork pointer */
1718{
1719 xfs_ext_irec_t *erp; /* indirection array pointer */
1720 xfs_extnum_t nextents; /* number of extents in file */
1721
1722 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1723 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1724 ASSERT(nextents <= XFS_LINEAR_EXTS);
1725
1726 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1727
1728 if (nextents == 0) {
1729 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1730 } else if (!ifp->if_real_bytes) {
1731 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1732 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1733 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1734 }
1735 erp->er_extbuf = ifp->if_u1.if_extents;
1736 erp->er_extcount = nextents;
1737 erp->er_extoff = 0;
1738
1739 ifp->if_flags |= XFS_IFEXTIREC;
1740 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1741 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1742 ifp->if_u1.if_ext_irec = erp;
1743
1744 return;
1745}
1746
1747/*
1748 * Allocate and initialize a new entry in the indirection array.
1749 */
1750xfs_ext_irec_t *
1751xfs_iext_irec_new(
1752 xfs_ifork_t *ifp, /* inode fork pointer */
1753 int erp_idx) /* index for new irec */
1754{
1755 xfs_ext_irec_t *erp; /* indirection array pointer */
1756 int i; /* loop counter */
1757 int nlists; /* number of irec's (ex lists) */
1758
1759 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1760 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1761
1762 /* Resize indirection array */
1763 xfs_iext_realloc_indirect(ifp, ++nlists *
1764 sizeof(xfs_ext_irec_t));
1765 /*
1766 * Move records down in the array so the
1767 * new page can use erp_idx.
1768 */
1769 erp = ifp->if_u1.if_ext_irec;
1770 for (i = nlists - 1; i > erp_idx; i--) {
1771 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1772 }
1773 ASSERT(i == erp_idx);
1774
1775 /* Initialize new extent record */
1776 erp = ifp->if_u1.if_ext_irec;
1777 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1778 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1779 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1780 erp[erp_idx].er_extcount = 0;
1781 erp[erp_idx].er_extoff = erp_idx > 0 ?
1782 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1783 return (&erp[erp_idx]);
1784}
1785
1786/*
1787 * Remove a record from the indirection array.
1788 */
1789void
1790xfs_iext_irec_remove(
1791 xfs_ifork_t *ifp, /* inode fork pointer */
1792 int erp_idx) /* irec index to remove */
1793{
1794 xfs_ext_irec_t *erp; /* indirection array pointer */
1795 int i; /* loop counter */
1796 int nlists; /* number of irec's (ex lists) */
1797
1798 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1799 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1800 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1801 if (erp->er_extbuf) {
1802 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1803 -erp->er_extcount);
1804 kmem_free(erp->er_extbuf);
1805 }
1806 /* Compact extent records */
1807 erp = ifp->if_u1.if_ext_irec;
1808 for (i = erp_idx; i < nlists - 1; i++) {
1809 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1810 }
1811 /*
1812 * Manually free the last extent record from the indirection
1813 * array. A call to xfs_iext_realloc_indirect() with a size
1814 * of zero would result in a call to xfs_iext_destroy() which
1815 * would in turn call this function again, creating a nasty
1816 * infinite loop.
1817 */
1818 if (--nlists) {
1819 xfs_iext_realloc_indirect(ifp,
1820 nlists * sizeof(xfs_ext_irec_t));
1821 } else {
1822 kmem_free(ifp->if_u1.if_ext_irec);
1823 }
1824 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1825}
1826
1827/*
1828 * This is called to clean up large amounts of unused memory allocated
1829 * by the indirection array. Before compacting anything though, verify
1830 * that the indirection array is still needed and switch back to the
1831 * linear extent list (or even the inline buffer) if possible. The
1832 * compaction policy is as follows:
1833 *
1834 * Full Compaction: Extents fit into a single page (or inline buffer)
1835 * Partial Compaction: Extents occupy less than 50% of allocated space
1836 * No Compaction: Extents occupy at least 50% of allocated space
1837 */
1838void
1839xfs_iext_irec_compact(
1840 xfs_ifork_t *ifp) /* inode fork pointer */
1841{
1842 xfs_extnum_t nextents; /* number of extents in file */
1843 int nlists; /* number of irec's (ex lists) */
1844
1845 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1846 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1847 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1848
1849 if (nextents == 0) {
1850 xfs_iext_destroy(ifp);
1851 } else if (nextents <= XFS_INLINE_EXTS) {
1852 xfs_iext_indirect_to_direct(ifp);
1853 xfs_iext_direct_to_inline(ifp, nextents);
1854 } else if (nextents <= XFS_LINEAR_EXTS) {
1855 xfs_iext_indirect_to_direct(ifp);
1856 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1857 xfs_iext_irec_compact_pages(ifp);
1858 }
1859}
1860
1861/*
1862 * Combine extents from neighboring extent pages.
1863 */
1864void
1865xfs_iext_irec_compact_pages(
1866 xfs_ifork_t *ifp) /* inode fork pointer */
1867{
1868 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1869 int erp_idx = 0; /* indirection array index */
1870 int nlists; /* number of irec's (ex lists) */
1871
1872 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1873 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1874 while (erp_idx < nlists - 1) {
1875 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1876 erp_next = erp + 1;
1877 if (erp_next->er_extcount <=
1878 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1879 memcpy(&erp->er_extbuf[erp->er_extcount],
1880 erp_next->er_extbuf, erp_next->er_extcount *
1881 sizeof(xfs_bmbt_rec_t));
1882 erp->er_extcount += erp_next->er_extcount;
1883 /*
1884 * Free page before removing extent record
1885 * so er_extoffs don't get modified in
1886 * xfs_iext_irec_remove.
1887 */
1888 kmem_free(erp_next->er_extbuf);
1889 erp_next->er_extbuf = NULL;
1890 xfs_iext_irec_remove(ifp, erp_idx + 1);
1891 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1892 } else {
1893 erp_idx++;
1894 }
1895 }
1896}
1897
1898/*
1899 * This is called to update the er_extoff field in the indirection
1900 * array when extents have been added or removed from one of the
1901 * extent lists. erp_idx contains the irec index to begin updating
1902 * at and ext_diff contains the number of extents that were added
1903 * or removed.
1904 */
1905void
1906xfs_iext_irec_update_extoffs(
1907 xfs_ifork_t *ifp, /* inode fork pointer */
1908 int erp_idx, /* irec index to update */
1909 int ext_diff) /* number of new extents */
1910{
1911 int i; /* loop counter */
1912 int nlists; /* number of irec's (ex lists */
1913
1914 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1915 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1916 for (i = erp_idx; i < nlists; i++) {
1917 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1918 }
1919}