blob: 73514c0486b710fdfcbcc53a2a5683498cf782fc [file] [log] [blame]
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110023#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100025#include "xfs_inum.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100026#include "xfs_sb.h"
27#include "xfs_ag.h"
28#include "xfs_mount.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100029#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110030#include "xfs_trans.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100031#include "xfs_inode_item.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110032#include "xfs_bmap_btree.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100033#include "xfs_bmap.h"
34#include "xfs_error.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100035#include "xfs_trace.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110036#include "xfs_attr_sf.h"
37#include "xfs_dinode.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100038
39kmem_zone_t *xfs_ifork_zone;
40
41STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44
45#ifdef DEBUG
46/*
47 * Make sure that the extents in the given memory buffer
48 * are valid.
49 */
50void
51xfs_validate_extents(
52 xfs_ifork_t *ifp,
53 int nrecs,
54 xfs_exntfmt_t fmt)
55{
56 xfs_bmbt_irec_t irec;
57 xfs_bmbt_rec_host_t rec;
58 int i;
59
60 for (i = 0; i < nrecs; i++) {
61 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
62 rec.l0 = get_unaligned(&ep->l0);
63 rec.l1 = get_unaligned(&ep->l1);
64 xfs_bmbt_get_all(&rec, &irec);
65 if (fmt == XFS_EXTFMT_NOSTATE)
66 ASSERT(irec.br_state == XFS_EXT_NORM);
67 }
68}
69#else /* DEBUG */
70#define xfs_validate_extents(ifp, nrecs, fmt)
71#endif /* DEBUG */
72
73
74/*
75 * Move inode type and inode format specific information from the
76 * on-disk inode to the in-core inode. For fifos, devs, and sockets
77 * this means set if_rdev to the proper value. For files, directories,
78 * and symlinks this means to bring in the in-line data or extent
79 * pointers. For a file in B-tree format, only the root is immediately
80 * brought in-core. The rest will be in-lined in if_extents when it
81 * is first referenced (see xfs_iread_extents()).
82 */
83int
84xfs_iformat_fork(
85 xfs_inode_t *ip,
86 xfs_dinode_t *dip)
87{
88 xfs_attr_shortform_t *atp;
89 int size;
90 int error = 0;
91 xfs_fsize_t di_size;
92
93 if (unlikely(be32_to_cpu(dip->di_nextents) +
94 be16_to_cpu(dip->di_anextents) >
95 be64_to_cpu(dip->di_nblocks))) {
96 xfs_warn(ip->i_mount,
97 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
98 (unsigned long long)ip->i_ino,
99 (int)(be32_to_cpu(dip->di_nextents) +
100 be16_to_cpu(dip->di_anextents)),
101 (unsigned long long)
102 be64_to_cpu(dip->di_nblocks));
103 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
104 ip->i_mount, dip);
105 return XFS_ERROR(EFSCORRUPTED);
106 }
107
108 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
109 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
110 (unsigned long long)ip->i_ino,
111 dip->di_forkoff);
112 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
113 ip->i_mount, dip);
114 return XFS_ERROR(EFSCORRUPTED);
115 }
116
117 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
118 !ip->i_mount->m_rtdev_targp)) {
119 xfs_warn(ip->i_mount,
120 "corrupt dinode %Lu, has realtime flag set.",
121 ip->i_ino);
122 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
123 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
124 return XFS_ERROR(EFSCORRUPTED);
125 }
126
127 switch (ip->i_d.di_mode & S_IFMT) {
128 case S_IFIFO:
129 case S_IFCHR:
130 case S_IFBLK:
131 case S_IFSOCK:
132 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
133 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
134 ip->i_mount, dip);
135 return XFS_ERROR(EFSCORRUPTED);
136 }
137 ip->i_d.di_size = 0;
138 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
139 break;
140
141 case S_IFREG:
142 case S_IFLNK:
143 case S_IFDIR:
144 switch (dip->di_format) {
145 case XFS_DINODE_FMT_LOCAL:
146 /*
147 * no local regular files yet
148 */
149 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
150 xfs_warn(ip->i_mount,
151 "corrupt inode %Lu (local format for regular file).",
152 (unsigned long long) ip->i_ino);
153 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
154 XFS_ERRLEVEL_LOW,
155 ip->i_mount, dip);
156 return XFS_ERROR(EFSCORRUPTED);
157 }
158
159 di_size = be64_to_cpu(dip->di_size);
Dan Carpenter0d0ab122013-08-15 08:53:38 +0300160 if (unlikely(di_size < 0 ||
161 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000162 xfs_warn(ip->i_mount,
163 "corrupt inode %Lu (bad size %Ld for local inode).",
164 (unsigned long long) ip->i_ino,
165 (long long) di_size);
166 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
167 XFS_ERRLEVEL_LOW,
168 ip->i_mount, dip);
169 return XFS_ERROR(EFSCORRUPTED);
170 }
171
172 size = (int)di_size;
173 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
174 break;
175 case XFS_DINODE_FMT_EXTENTS:
176 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
177 break;
178 case XFS_DINODE_FMT_BTREE:
179 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
180 break;
181 default:
182 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
183 ip->i_mount);
184 return XFS_ERROR(EFSCORRUPTED);
185 }
186 break;
187
188 default:
189 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
190 return XFS_ERROR(EFSCORRUPTED);
191 }
192 if (error) {
193 return error;
194 }
195 if (!XFS_DFORK_Q(dip))
196 return 0;
197
198 ASSERT(ip->i_afp == NULL);
199 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
200
201 switch (dip->di_aformat) {
202 case XFS_DINODE_FMT_LOCAL:
203 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
204 size = be16_to_cpu(atp->hdr.totsize);
205
206 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
207 xfs_warn(ip->i_mount,
208 "corrupt inode %Lu (bad attr fork size %Ld).",
209 (unsigned long long) ip->i_ino,
210 (long long) size);
211 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
212 XFS_ERRLEVEL_LOW,
213 ip->i_mount, dip);
214 return XFS_ERROR(EFSCORRUPTED);
215 }
216
217 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
218 break;
219 case XFS_DINODE_FMT_EXTENTS:
220 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
221 break;
222 case XFS_DINODE_FMT_BTREE:
223 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
224 break;
225 default:
226 error = XFS_ERROR(EFSCORRUPTED);
227 break;
228 }
229 if (error) {
230 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
231 ip->i_afp = NULL;
232 xfs_idestroy_fork(ip, XFS_DATA_FORK);
233 }
234 return error;
235}
236
237/*
238 * The file is in-lined in the on-disk inode.
239 * If it fits into if_inline_data, then copy
240 * it there, otherwise allocate a buffer for it
241 * and copy the data there. Either way, set
242 * if_data to point at the data.
243 * If we allocate a buffer for the data, make
244 * sure that its size is a multiple of 4 and
245 * record the real size in i_real_bytes.
246 */
247STATIC int
248xfs_iformat_local(
249 xfs_inode_t *ip,
250 xfs_dinode_t *dip,
251 int whichfork,
252 int size)
253{
254 xfs_ifork_t *ifp;
255 int real_size;
256
257 /*
258 * If the size is unreasonable, then something
259 * is wrong and we just bail out rather than crash in
260 * kmem_alloc() or memcpy() below.
261 */
262 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
263 xfs_warn(ip->i_mount,
264 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
265 (unsigned long long) ip->i_ino, size,
266 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
267 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
268 ip->i_mount, dip);
269 return XFS_ERROR(EFSCORRUPTED);
270 }
271 ifp = XFS_IFORK_PTR(ip, whichfork);
272 real_size = 0;
273 if (size == 0)
274 ifp->if_u1.if_data = NULL;
275 else if (size <= sizeof(ifp->if_u2.if_inline_data))
276 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
277 else {
278 real_size = roundup(size, 4);
279 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
280 }
281 ifp->if_bytes = size;
282 ifp->if_real_bytes = real_size;
283 if (size)
284 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
285 ifp->if_flags &= ~XFS_IFEXTENTS;
286 ifp->if_flags |= XFS_IFINLINE;
287 return 0;
288}
289
290/*
291 * The file consists of a set of extents all
292 * of which fit into the on-disk inode.
293 * If there are few enough extents to fit into
294 * the if_inline_ext, then copy them there.
295 * Otherwise allocate a buffer for them and copy
296 * them into it. Either way, set if_extents
297 * to point at the extents.
298 */
299STATIC int
300xfs_iformat_extents(
301 xfs_inode_t *ip,
302 xfs_dinode_t *dip,
303 int whichfork)
304{
305 xfs_bmbt_rec_t *dp;
306 xfs_ifork_t *ifp;
307 int nex;
308 int size;
309 int i;
310
311 ifp = XFS_IFORK_PTR(ip, whichfork);
312 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
313 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
314
315 /*
316 * If the number of extents is unreasonable, then something
317 * is wrong and we just bail out rather than crash in
318 * kmem_alloc() or memcpy() below.
319 */
320 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
321 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
322 (unsigned long long) ip->i_ino, nex);
323 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
324 ip->i_mount, dip);
325 return XFS_ERROR(EFSCORRUPTED);
326 }
327
328 ifp->if_real_bytes = 0;
329 if (nex == 0)
330 ifp->if_u1.if_extents = NULL;
331 else if (nex <= XFS_INLINE_EXTS)
332 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
333 else
334 xfs_iext_add(ifp, 0, nex);
335
336 ifp->if_bytes = size;
337 if (size) {
338 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
339 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
340 for (i = 0; i < nex; i++, dp++) {
341 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
342 ep->l0 = get_unaligned_be64(&dp->l0);
343 ep->l1 = get_unaligned_be64(&dp->l1);
344 }
345 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
346 if (whichfork != XFS_DATA_FORK ||
347 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
348 if (unlikely(xfs_check_nostate_extents(
349 ifp, 0, nex))) {
350 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
351 XFS_ERRLEVEL_LOW,
352 ip->i_mount);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355 }
356 ifp->if_flags |= XFS_IFEXTENTS;
357 return 0;
358}
359
360/*
361 * The file has too many extents to fit into
362 * the inode, so they are in B-tree format.
363 * Allocate a buffer for the root of the B-tree
364 * and copy the root into it. The i_extents
365 * field will remain NULL until all of the
366 * extents are read in (when they are needed).
367 */
368STATIC int
369xfs_iformat_btree(
370 xfs_inode_t *ip,
371 xfs_dinode_t *dip,
372 int whichfork)
373{
374 struct xfs_mount *mp = ip->i_mount;
375 xfs_bmdr_block_t *dfp;
376 xfs_ifork_t *ifp;
377 /* REFERENCED */
378 int nrecs;
379 int size;
380
381 ifp = XFS_IFORK_PTR(ip, whichfork);
382 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
383 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
384 nrecs = be16_to_cpu(dfp->bb_numrecs);
385
386 /*
387 * blow out if -- fork has less extents than can fit in
388 * fork (fork shouldn't be a btree format), root btree
389 * block has more records than can fit into the fork,
390 * or the number of extents is greater than the number of
391 * blocks.
392 */
393 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
394 XFS_IFORK_MAXEXT(ip, whichfork) ||
395 XFS_BMDR_SPACE_CALC(nrecs) >
396 XFS_DFORK_SIZE(dip, mp, whichfork) ||
397 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
398 xfs_warn(mp, "corrupt inode %Lu (btree).",
399 (unsigned long long) ip->i_ino);
400 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
401 mp, dip);
402 return XFS_ERROR(EFSCORRUPTED);
403 }
404
405 ifp->if_broot_bytes = size;
406 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
407 ASSERT(ifp->if_broot != NULL);
408 /*
409 * Copy and convert from the on-disk structure
410 * to the in-memory structure.
411 */
412 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
413 ifp->if_broot, size);
414 ifp->if_flags &= ~XFS_IFEXTENTS;
415 ifp->if_flags |= XFS_IFBROOT;
416
417 return 0;
418}
419
420/*
421 * Read in extents from a btree-format inode.
422 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
423 */
424int
425xfs_iread_extents(
426 xfs_trans_t *tp,
427 xfs_inode_t *ip,
428 int whichfork)
429{
430 int error;
431 xfs_ifork_t *ifp;
432 xfs_extnum_t nextents;
433
Christoph Hellwigeef334e2013-12-06 12:30:17 -0800434 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
435
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000436 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
437 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
438 ip->i_mount);
439 return XFS_ERROR(EFSCORRUPTED);
440 }
441 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
442 ifp = XFS_IFORK_PTR(ip, whichfork);
443
444 /*
445 * We know that the size is valid (it's checked in iformat_btree)
446 */
447 ifp->if_bytes = ifp->if_real_bytes = 0;
448 ifp->if_flags |= XFS_IFEXTENTS;
449 xfs_iext_add(ifp, 0, nextents);
450 error = xfs_bmap_read_extents(tp, ip, whichfork);
451 if (error) {
452 xfs_iext_destroy(ifp);
453 ifp->if_flags &= ~XFS_IFEXTENTS;
454 return error;
455 }
456 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
457 return 0;
458}
459/*
460 * Reallocate the space for if_broot based on the number of records
461 * being added or deleted as indicated in rec_diff. Move the records
462 * and pointers in if_broot to fit the new size. When shrinking this
463 * will eliminate holes between the records and pointers created by
464 * the caller. When growing this will create holes to be filled in
465 * by the caller.
466 *
467 * The caller must not request to add more records than would fit in
468 * the on-disk inode root. If the if_broot is currently NULL, then
Zhi Yong Wuf6c27342013-08-07 10:11:04 +0000469 * if we are adding records, one will be allocated. The caller must also
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000470 * not request that the number of records go below zero, although
471 * it can go to zero.
472 *
473 * ip -- the inode whose if_broot area is changing
474 * ext_diff -- the change in the number of records, positive or negative,
475 * requested for the if_broot array.
476 */
477void
478xfs_iroot_realloc(
479 xfs_inode_t *ip,
480 int rec_diff,
481 int whichfork)
482{
483 struct xfs_mount *mp = ip->i_mount;
484 int cur_max;
485 xfs_ifork_t *ifp;
486 struct xfs_btree_block *new_broot;
487 int new_max;
488 size_t new_size;
489 char *np;
490 char *op;
491
492 /*
493 * Handle the degenerate case quietly.
494 */
495 if (rec_diff == 0) {
496 return;
497 }
498
499 ifp = XFS_IFORK_PTR(ip, whichfork);
500 if (rec_diff > 0) {
501 /*
502 * If there wasn't any memory allocated before, just
503 * allocate it now and get out.
504 */
505 if (ifp->if_broot_bytes == 0) {
506 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
507 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
508 ifp->if_broot_bytes = (int)new_size;
509 return;
510 }
511
512 /*
513 * If there is already an existing if_broot, then we need
514 * to realloc() it and shift the pointers to their new
515 * location. The records don't change location because
516 * they are kept butted up against the btree block header.
517 */
518 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
519 new_max = cur_max + rec_diff;
520 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
521 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
522 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
523 KM_SLEEP | KM_NOFS);
524 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
525 ifp->if_broot_bytes);
526 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
527 (int)new_size);
528 ifp->if_broot_bytes = (int)new_size;
529 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
530 XFS_IFORK_SIZE(ip, whichfork));
531 memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
532 return;
533 }
534
535 /*
536 * rec_diff is less than 0. In this case, we are shrinking the
537 * if_broot buffer. It must already exist. If we go to zero
538 * records, just get rid of the root and clear the status bit.
539 */
540 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
541 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
542 new_max = cur_max + rec_diff;
543 ASSERT(new_max >= 0);
544 if (new_max > 0)
545 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
546 else
547 new_size = 0;
548 if (new_size > 0) {
549 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
550 /*
551 * First copy over the btree block header.
552 */
553 memcpy(new_broot, ifp->if_broot,
554 XFS_BMBT_BLOCK_LEN(ip->i_mount));
555 } else {
556 new_broot = NULL;
557 ifp->if_flags &= ~XFS_IFBROOT;
558 }
559
560 /*
561 * Only copy the records and pointers if there are any.
562 */
563 if (new_max > 0) {
564 /*
565 * First copy the records.
566 */
567 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
568 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
569 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
570
571 /*
572 * Then copy the pointers.
573 */
574 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
575 ifp->if_broot_bytes);
576 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
577 (int)new_size);
578 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
579 }
580 kmem_free(ifp->if_broot);
581 ifp->if_broot = new_broot;
582 ifp->if_broot_bytes = (int)new_size;
583 if (ifp->if_broot)
584 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
585 XFS_IFORK_SIZE(ip, whichfork));
586 return;
587}
588
589
590/*
591 * This is called when the amount of space needed for if_data
592 * is increased or decreased. The change in size is indicated by
593 * the number of bytes that need to be added or deleted in the
594 * byte_diff parameter.
595 *
596 * If the amount of space needed has decreased below the size of the
597 * inline buffer, then switch to using the inline buffer. Otherwise,
598 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
599 * to what is needed.
600 *
601 * ip -- the inode whose if_data area is changing
602 * byte_diff -- the change in the number of bytes, positive or negative,
603 * requested for the if_data array.
604 */
605void
606xfs_idata_realloc(
607 xfs_inode_t *ip,
608 int byte_diff,
609 int whichfork)
610{
611 xfs_ifork_t *ifp;
612 int new_size;
613 int real_size;
614
615 if (byte_diff == 0) {
616 return;
617 }
618
619 ifp = XFS_IFORK_PTR(ip, whichfork);
620 new_size = (int)ifp->if_bytes + byte_diff;
621 ASSERT(new_size >= 0);
622
623 if (new_size == 0) {
624 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
625 kmem_free(ifp->if_u1.if_data);
626 }
627 ifp->if_u1.if_data = NULL;
628 real_size = 0;
629 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
630 /*
631 * If the valid extents/data can fit in if_inline_ext/data,
632 * copy them from the malloc'd vector and free it.
633 */
634 if (ifp->if_u1.if_data == NULL) {
635 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
636 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
637 ASSERT(ifp->if_real_bytes != 0);
638 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
639 new_size);
640 kmem_free(ifp->if_u1.if_data);
641 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
642 }
643 real_size = 0;
644 } else {
645 /*
646 * Stuck with malloc/realloc.
647 * For inline data, the underlying buffer must be
648 * a multiple of 4 bytes in size so that it can be
649 * logged and stay on word boundaries. We enforce
650 * that here.
651 */
652 real_size = roundup(new_size, 4);
653 if (ifp->if_u1.if_data == NULL) {
654 ASSERT(ifp->if_real_bytes == 0);
655 ifp->if_u1.if_data = kmem_alloc(real_size,
656 KM_SLEEP | KM_NOFS);
657 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
658 /*
659 * Only do the realloc if the underlying size
660 * is really changing.
661 */
662 if (ifp->if_real_bytes != real_size) {
663 ifp->if_u1.if_data =
664 kmem_realloc(ifp->if_u1.if_data,
665 real_size,
666 ifp->if_real_bytes,
667 KM_SLEEP | KM_NOFS);
668 }
669 } else {
670 ASSERT(ifp->if_real_bytes == 0);
671 ifp->if_u1.if_data = kmem_alloc(real_size,
672 KM_SLEEP | KM_NOFS);
673 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
674 ifp->if_bytes);
675 }
676 }
677 ifp->if_real_bytes = real_size;
678 ifp->if_bytes = new_size;
679 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
680}
681
682void
683xfs_idestroy_fork(
684 xfs_inode_t *ip,
685 int whichfork)
686{
687 xfs_ifork_t *ifp;
688
689 ifp = XFS_IFORK_PTR(ip, whichfork);
690 if (ifp->if_broot != NULL) {
691 kmem_free(ifp->if_broot);
692 ifp->if_broot = NULL;
693 }
694
695 /*
696 * If the format is local, then we can't have an extents
697 * array so just look for an inline data array. If we're
698 * not local then we may or may not have an extents list,
699 * so check and free it up if we do.
700 */
701 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
702 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
703 (ifp->if_u1.if_data != NULL)) {
704 ASSERT(ifp->if_real_bytes != 0);
705 kmem_free(ifp->if_u1.if_data);
706 ifp->if_u1.if_data = NULL;
707 ifp->if_real_bytes = 0;
708 }
709 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
710 ((ifp->if_flags & XFS_IFEXTIREC) ||
711 ((ifp->if_u1.if_extents != NULL) &&
712 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
713 ASSERT(ifp->if_real_bytes != 0);
714 xfs_iext_destroy(ifp);
715 }
716 ASSERT(ifp->if_u1.if_extents == NULL ||
717 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
718 ASSERT(ifp->if_real_bytes == 0);
719 if (whichfork == XFS_ATTR_FORK) {
720 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
721 ip->i_afp = NULL;
722 }
723}
724
725/*
Christoph Hellwigda776502013-12-13 11:34:04 +1100726 * Convert in-core extents to on-disk form
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000727 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100728 * For either the data or attr fork in extent format, we need to endian convert
729 * the in-core extent as we place them into the on-disk inode.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000730 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100731 * In the case of the data fork, the in-core and on-disk fork sizes can be
732 * different due to delayed allocation extents. We only copy on-disk extents
733 * here, so callers must always use the physical fork size to determine the
734 * size of the buffer passed to this routine. We will return the size actually
735 * used.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000736 */
737int
738xfs_iextents_copy(
739 xfs_inode_t *ip,
740 xfs_bmbt_rec_t *dp,
741 int whichfork)
742{
743 int copied;
744 int i;
745 xfs_ifork_t *ifp;
746 int nrecs;
747 xfs_fsblock_t start_block;
748
749 ifp = XFS_IFORK_PTR(ip, whichfork);
750 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
751 ASSERT(ifp->if_bytes > 0);
752
753 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
754 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
755 ASSERT(nrecs > 0);
756
757 /*
758 * There are some delayed allocation extents in the
759 * inode, so copy the extents one at a time and skip
760 * the delayed ones. There must be at least one
761 * non-delayed extent.
762 */
763 copied = 0;
764 for (i = 0; i < nrecs; i++) {
765 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
766 start_block = xfs_bmbt_get_startblock(ep);
767 if (isnullstartblock(start_block)) {
768 /*
769 * It's a delayed allocation extent, so skip it.
770 */
771 continue;
772 }
773
774 /* Translate to on disk format */
Dave Chinnerc5c249b2013-08-12 20:49:43 +1000775 put_unaligned_be64(ep->l0, &dp->l0);
776 put_unaligned_be64(ep->l1, &dp->l1);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000777 dp++;
778 copied++;
779 }
780 ASSERT(copied != 0);
781 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
782
783 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
784}
785
786/*
787 * Each of the following cases stores data into the same region
788 * of the on-disk inode, so only one of them can be valid at
789 * any given time. While it is possible to have conflicting formats
790 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
791 * in EXTENTS format, this can only happen when the fork has
792 * changed formats after being modified but before being flushed.
793 * In these cases, the format always takes precedence, because the
794 * format indicates the current state of the fork.
795 */
796void
797xfs_iflush_fork(
798 xfs_inode_t *ip,
799 xfs_dinode_t *dip,
800 xfs_inode_log_item_t *iip,
801 int whichfork,
802 xfs_buf_t *bp)
803{
804 char *cp;
805 xfs_ifork_t *ifp;
806 xfs_mount_t *mp;
807 static const short brootflag[2] =
808 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
809 static const short dataflag[2] =
810 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
811 static const short extflag[2] =
812 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
813
814 if (!iip)
815 return;
816 ifp = XFS_IFORK_PTR(ip, whichfork);
817 /*
818 * This can happen if we gave up in iformat in an error path,
819 * for the attribute fork.
820 */
821 if (!ifp) {
822 ASSERT(whichfork == XFS_ATTR_FORK);
823 return;
824 }
825 cp = XFS_DFORK_PTR(dip, whichfork);
826 mp = ip->i_mount;
827 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
828 case XFS_DINODE_FMT_LOCAL:
829 if ((iip->ili_fields & dataflag[whichfork]) &&
830 (ifp->if_bytes > 0)) {
831 ASSERT(ifp->if_u1.if_data != NULL);
832 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
833 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
834 }
835 break;
836
837 case XFS_DINODE_FMT_EXTENTS:
838 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
839 !(iip->ili_fields & extflag[whichfork]));
840 if ((iip->ili_fields & extflag[whichfork]) &&
841 (ifp->if_bytes > 0)) {
842 ASSERT(xfs_iext_get_ext(ifp, 0));
843 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
844 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
845 whichfork);
846 }
847 break;
848
849 case XFS_DINODE_FMT_BTREE:
850 if ((iip->ili_fields & brootflag[whichfork]) &&
851 (ifp->if_broot_bytes > 0)) {
852 ASSERT(ifp->if_broot != NULL);
853 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
854 XFS_IFORK_SIZE(ip, whichfork));
855 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
856 (xfs_bmdr_block_t *)cp,
857 XFS_DFORK_SIZE(dip, mp, whichfork));
858 }
859 break;
860
861 case XFS_DINODE_FMT_DEV:
862 if (iip->ili_fields & XFS_ILOG_DEV) {
863 ASSERT(whichfork == XFS_DATA_FORK);
864 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
865 }
866 break;
867
868 case XFS_DINODE_FMT_UUID:
869 if (iip->ili_fields & XFS_ILOG_UUID) {
870 ASSERT(whichfork == XFS_DATA_FORK);
871 memcpy(XFS_DFORK_DPTR(dip),
872 &ip->i_df.if_u2.if_uuid,
873 sizeof(uuid_t));
874 }
875 break;
876
877 default:
878 ASSERT(0);
879 break;
880 }
881}
882
883/*
884 * Return a pointer to the extent record at file index idx.
885 */
886xfs_bmbt_rec_host_t *
887xfs_iext_get_ext(
888 xfs_ifork_t *ifp, /* inode fork pointer */
889 xfs_extnum_t idx) /* index of target extent */
890{
891 ASSERT(idx >= 0);
892 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
893
894 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
895 return ifp->if_u1.if_ext_irec->er_extbuf;
896 } else if (ifp->if_flags & XFS_IFEXTIREC) {
897 xfs_ext_irec_t *erp; /* irec pointer */
898 int erp_idx = 0; /* irec index */
899 xfs_extnum_t page_idx = idx; /* ext index in target list */
900
901 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
902 return &erp->er_extbuf[page_idx];
903 } else if (ifp->if_bytes) {
904 return &ifp->if_u1.if_extents[idx];
905 } else {
906 return NULL;
907 }
908}
909
910/*
911 * Insert new item(s) into the extent records for incore inode
912 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
913 */
914void
915xfs_iext_insert(
916 xfs_inode_t *ip, /* incore inode pointer */
917 xfs_extnum_t idx, /* starting index of new items */
918 xfs_extnum_t count, /* number of inserted items */
919 xfs_bmbt_irec_t *new, /* items to insert */
920 int state) /* type of extent conversion */
921{
922 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
923 xfs_extnum_t i; /* extent record index */
924
925 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
926
927 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
928 xfs_iext_add(ifp, idx, count);
929 for (i = idx; i < idx + count; i++, new++)
930 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
931}
932
933/*
934 * This is called when the amount of space required for incore file
935 * extents needs to be increased. The ext_diff parameter stores the
936 * number of new extents being added and the idx parameter contains
937 * the extent index where the new extents will be added. If the new
938 * extents are being appended, then we just need to (re)allocate and
939 * initialize the space. Otherwise, if the new extents are being
940 * inserted into the middle of the existing entries, a bit more work
941 * is required to make room for the new extents to be inserted. The
942 * caller is responsible for filling in the new extent entries upon
943 * return.
944 */
945void
946xfs_iext_add(
947 xfs_ifork_t *ifp, /* inode fork pointer */
948 xfs_extnum_t idx, /* index to begin adding exts */
949 int ext_diff) /* number of extents to add */
950{
951 int byte_diff; /* new bytes being added */
952 int new_size; /* size of extents after adding */
953 xfs_extnum_t nextents; /* number of extents in file */
954
955 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
956 ASSERT((idx >= 0) && (idx <= nextents));
957 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
958 new_size = ifp->if_bytes + byte_diff;
959 /*
960 * If the new number of extents (nextents + ext_diff)
961 * fits inside the inode, then continue to use the inline
962 * extent buffer.
963 */
964 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
965 if (idx < nextents) {
966 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
967 &ifp->if_u2.if_inline_ext[idx],
968 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
969 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
970 }
971 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
972 ifp->if_real_bytes = 0;
973 }
974 /*
975 * Otherwise use a linear (direct) extent list.
976 * If the extents are currently inside the inode,
977 * xfs_iext_realloc_direct will switch us from
978 * inline to direct extent allocation mode.
979 */
980 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
981 xfs_iext_realloc_direct(ifp, new_size);
982 if (idx < nextents) {
983 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
984 &ifp->if_u1.if_extents[idx],
985 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
986 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
987 }
988 }
989 /* Indirection array */
990 else {
991 xfs_ext_irec_t *erp;
992 int erp_idx = 0;
993 int page_idx = idx;
994
995 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
996 if (ifp->if_flags & XFS_IFEXTIREC) {
997 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
998 } else {
999 xfs_iext_irec_init(ifp);
1000 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1001 erp = ifp->if_u1.if_ext_irec;
1002 }
1003 /* Extents fit in target extent page */
1004 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1005 if (page_idx < erp->er_extcount) {
1006 memmove(&erp->er_extbuf[page_idx + ext_diff],
1007 &erp->er_extbuf[page_idx],
1008 (erp->er_extcount - page_idx) *
1009 sizeof(xfs_bmbt_rec_t));
1010 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1011 }
1012 erp->er_extcount += ext_diff;
1013 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1014 }
1015 /* Insert a new extent page */
1016 else if (erp) {
1017 xfs_iext_add_indirect_multi(ifp,
1018 erp_idx, page_idx, ext_diff);
1019 }
1020 /*
1021 * If extent(s) are being appended to the last page in
1022 * the indirection array and the new extent(s) don't fit
1023 * in the page, then erp is NULL and erp_idx is set to
1024 * the next index needed in the indirection array.
1025 */
1026 else {
Jie Liubb86d212013-10-25 14:52:44 +08001027 uint count = ext_diff;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001028
1029 while (count) {
1030 erp = xfs_iext_irec_new(ifp, erp_idx);
Jie Liubb86d212013-10-25 14:52:44 +08001031 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1032 count -= erp->er_extcount;
1033 if (count)
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001034 erp_idx++;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001035 }
1036 }
1037 }
1038 ifp->if_bytes = new_size;
1039}
1040
1041/*
1042 * This is called when incore extents are being added to the indirection
1043 * array and the new extents do not fit in the target extent list. The
1044 * erp_idx parameter contains the irec index for the target extent list
1045 * in the indirection array, and the idx parameter contains the extent
1046 * index within the list. The number of extents being added is stored
1047 * in the count parameter.
1048 *
1049 * |-------| |-------|
1050 * | | | | idx - number of extents before idx
1051 * | idx | | count |
1052 * | | | | count - number of extents being inserted at idx
1053 * |-------| |-------|
1054 * | count | | nex2 | nex2 - number of extents after idx + count
1055 * |-------| |-------|
1056 */
1057void
1058xfs_iext_add_indirect_multi(
1059 xfs_ifork_t *ifp, /* inode fork pointer */
1060 int erp_idx, /* target extent irec index */
1061 xfs_extnum_t idx, /* index within target list */
1062 int count) /* new extents being added */
1063{
1064 int byte_diff; /* new bytes being added */
1065 xfs_ext_irec_t *erp; /* pointer to irec entry */
1066 xfs_extnum_t ext_diff; /* number of extents to add */
1067 xfs_extnum_t ext_cnt; /* new extents still needed */
1068 xfs_extnum_t nex2; /* extents after idx + count */
1069 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1070 int nlists; /* number of irec's (lists) */
1071
1072 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1073 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1074 nex2 = erp->er_extcount - idx;
1075 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1076
1077 /*
1078 * Save second part of target extent list
1079 * (all extents past */
1080 if (nex2) {
1081 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1082 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1083 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1084 erp->er_extcount -= nex2;
1085 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1086 memset(&erp->er_extbuf[idx], 0, byte_diff);
1087 }
1088
1089 /*
1090 * Add the new extents to the end of the target
1091 * list, then allocate new irec record(s) and
1092 * extent buffer(s) as needed to store the rest
1093 * of the new extents.
1094 */
1095 ext_cnt = count;
1096 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1097 if (ext_diff) {
1098 erp->er_extcount += ext_diff;
1099 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1100 ext_cnt -= ext_diff;
1101 }
1102 while (ext_cnt) {
1103 erp_idx++;
1104 erp = xfs_iext_irec_new(ifp, erp_idx);
1105 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1106 erp->er_extcount = ext_diff;
1107 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1108 ext_cnt -= ext_diff;
1109 }
1110
1111 /* Add nex2 extents back to indirection array */
1112 if (nex2) {
1113 xfs_extnum_t ext_avail;
1114 int i;
1115
1116 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1117 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1118 i = 0;
1119 /*
1120 * If nex2 extents fit in the current page, append
1121 * nex2_ep after the new extents.
1122 */
1123 if (nex2 <= ext_avail) {
1124 i = erp->er_extcount;
1125 }
1126 /*
1127 * Otherwise, check if space is available in the
1128 * next page.
1129 */
1130 else if ((erp_idx < nlists - 1) &&
1131 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1132 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1133 erp_idx++;
1134 erp++;
1135 /* Create a hole for nex2 extents */
1136 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1137 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1138 }
1139 /*
1140 * Final choice, create a new extent page for
1141 * nex2 extents.
1142 */
1143 else {
1144 erp_idx++;
1145 erp = xfs_iext_irec_new(ifp, erp_idx);
1146 }
1147 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1148 kmem_free(nex2_ep);
1149 erp->er_extcount += nex2;
1150 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1151 }
1152}
1153
1154/*
1155 * This is called when the amount of space required for incore file
1156 * extents needs to be decreased. The ext_diff parameter stores the
1157 * number of extents to be removed and the idx parameter contains
1158 * the extent index where the extents will be removed from.
1159 *
1160 * If the amount of space needed has decreased below the linear
1161 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1162 * extent array. Otherwise, use kmem_realloc() to adjust the
1163 * size to what is needed.
1164 */
1165void
1166xfs_iext_remove(
1167 xfs_inode_t *ip, /* incore inode pointer */
1168 xfs_extnum_t idx, /* index to begin removing exts */
1169 int ext_diff, /* number of extents to remove */
1170 int state) /* type of extent conversion */
1171{
1172 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1173 xfs_extnum_t nextents; /* number of extents in file */
1174 int new_size; /* size of extents after removal */
1175
1176 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1177
1178 ASSERT(ext_diff > 0);
1179 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1180 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1181
1182 if (new_size == 0) {
1183 xfs_iext_destroy(ifp);
1184 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1185 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1186 } else if (ifp->if_real_bytes) {
1187 xfs_iext_remove_direct(ifp, idx, ext_diff);
1188 } else {
1189 xfs_iext_remove_inline(ifp, idx, ext_diff);
1190 }
1191 ifp->if_bytes = new_size;
1192}
1193
1194/*
1195 * This removes ext_diff extents from the inline buffer, beginning
1196 * at extent index idx.
1197 */
1198void
1199xfs_iext_remove_inline(
1200 xfs_ifork_t *ifp, /* inode fork pointer */
1201 xfs_extnum_t idx, /* index to begin removing exts */
1202 int ext_diff) /* number of extents to remove */
1203{
1204 int nextents; /* number of extents in file */
1205
1206 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1207 ASSERT(idx < XFS_INLINE_EXTS);
1208 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1209 ASSERT(((nextents - ext_diff) > 0) &&
1210 (nextents - ext_diff) < XFS_INLINE_EXTS);
1211
1212 if (idx + ext_diff < nextents) {
1213 memmove(&ifp->if_u2.if_inline_ext[idx],
1214 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1215 (nextents - (idx + ext_diff)) *
1216 sizeof(xfs_bmbt_rec_t));
1217 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1218 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1219 } else {
1220 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1221 ext_diff * sizeof(xfs_bmbt_rec_t));
1222 }
1223}
1224
1225/*
1226 * This removes ext_diff extents from a linear (direct) extent list,
1227 * beginning at extent index idx. If the extents are being removed
1228 * from the end of the list (ie. truncate) then we just need to re-
1229 * allocate the list to remove the extra space. Otherwise, if the
1230 * extents are being removed from the middle of the existing extent
1231 * entries, then we first need to move the extent records beginning
1232 * at idx + ext_diff up in the list to overwrite the records being
1233 * removed, then remove the extra space via kmem_realloc.
1234 */
1235void
1236xfs_iext_remove_direct(
1237 xfs_ifork_t *ifp, /* inode fork pointer */
1238 xfs_extnum_t idx, /* index to begin removing exts */
1239 int ext_diff) /* number of extents to remove */
1240{
1241 xfs_extnum_t nextents; /* number of extents in file */
1242 int new_size; /* size of extents after removal */
1243
1244 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1245 new_size = ifp->if_bytes -
1246 (ext_diff * sizeof(xfs_bmbt_rec_t));
1247 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1248
1249 if (new_size == 0) {
1250 xfs_iext_destroy(ifp);
1251 return;
1252 }
1253 /* Move extents up in the list (if needed) */
1254 if (idx + ext_diff < nextents) {
1255 memmove(&ifp->if_u1.if_extents[idx],
1256 &ifp->if_u1.if_extents[idx + ext_diff],
1257 (nextents - (idx + ext_diff)) *
1258 sizeof(xfs_bmbt_rec_t));
1259 }
1260 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1261 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1262 /*
1263 * Reallocate the direct extent list. If the extents
1264 * will fit inside the inode then xfs_iext_realloc_direct
1265 * will switch from direct to inline extent allocation
1266 * mode for us.
1267 */
1268 xfs_iext_realloc_direct(ifp, new_size);
1269 ifp->if_bytes = new_size;
1270}
1271
1272/*
1273 * This is called when incore extents are being removed from the
1274 * indirection array and the extents being removed span multiple extent
1275 * buffers. The idx parameter contains the file extent index where we
1276 * want to begin removing extents, and the count parameter contains
1277 * how many extents need to be removed.
1278 *
1279 * |-------| |-------|
1280 * | nex1 | | | nex1 - number of extents before idx
1281 * |-------| | count |
1282 * | | | | count - number of extents being removed at idx
1283 * | count | |-------|
1284 * | | | nex2 | nex2 - number of extents after idx + count
1285 * |-------| |-------|
1286 */
1287void
1288xfs_iext_remove_indirect(
1289 xfs_ifork_t *ifp, /* inode fork pointer */
1290 xfs_extnum_t idx, /* index to begin removing extents */
1291 int count) /* number of extents to remove */
1292{
1293 xfs_ext_irec_t *erp; /* indirection array pointer */
1294 int erp_idx = 0; /* indirection array index */
1295 xfs_extnum_t ext_cnt; /* extents left to remove */
1296 xfs_extnum_t ext_diff; /* extents to remove in current list */
1297 xfs_extnum_t nex1; /* number of extents before idx */
1298 xfs_extnum_t nex2; /* extents after idx + count */
1299 int page_idx = idx; /* index in target extent list */
1300
1301 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1302 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1303 ASSERT(erp != NULL);
1304 nex1 = page_idx;
1305 ext_cnt = count;
1306 while (ext_cnt) {
1307 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1308 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1309 /*
1310 * Check for deletion of entire list;
1311 * xfs_iext_irec_remove() updates extent offsets.
1312 */
1313 if (ext_diff == erp->er_extcount) {
1314 xfs_iext_irec_remove(ifp, erp_idx);
1315 ext_cnt -= ext_diff;
1316 nex1 = 0;
1317 if (ext_cnt) {
1318 ASSERT(erp_idx < ifp->if_real_bytes /
1319 XFS_IEXT_BUFSZ);
1320 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1321 nex1 = 0;
1322 continue;
1323 } else {
1324 break;
1325 }
1326 }
1327 /* Move extents up (if needed) */
1328 if (nex2) {
1329 memmove(&erp->er_extbuf[nex1],
1330 &erp->er_extbuf[nex1 + ext_diff],
1331 nex2 * sizeof(xfs_bmbt_rec_t));
1332 }
1333 /* Zero out rest of page */
1334 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1335 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1336 /* Update remaining counters */
1337 erp->er_extcount -= ext_diff;
1338 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1339 ext_cnt -= ext_diff;
1340 nex1 = 0;
1341 erp_idx++;
1342 erp++;
1343 }
1344 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1345 xfs_iext_irec_compact(ifp);
1346}
1347
1348/*
1349 * Create, destroy, or resize a linear (direct) block of extents.
1350 */
1351void
1352xfs_iext_realloc_direct(
1353 xfs_ifork_t *ifp, /* inode fork pointer */
Jie Liu17ec81c2013-09-22 16:25:15 +08001354 int new_size) /* new size of extents after adding */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001355{
1356 int rnew_size; /* real new size of extents */
1357
1358 rnew_size = new_size;
1359
1360 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1361 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1362 (new_size != ifp->if_real_bytes)));
1363
1364 /* Free extent records */
1365 if (new_size == 0) {
1366 xfs_iext_destroy(ifp);
1367 }
1368 /* Resize direct extent list and zero any new bytes */
1369 else if (ifp->if_real_bytes) {
1370 /* Check if extents will fit inside the inode */
1371 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1372 xfs_iext_direct_to_inline(ifp, new_size /
1373 (uint)sizeof(xfs_bmbt_rec_t));
1374 ifp->if_bytes = new_size;
1375 return;
1376 }
1377 if (!is_power_of_2(new_size)){
1378 rnew_size = roundup_pow_of_two(new_size);
1379 }
1380 if (rnew_size != ifp->if_real_bytes) {
1381 ifp->if_u1.if_extents =
1382 kmem_realloc(ifp->if_u1.if_extents,
1383 rnew_size,
1384 ifp->if_real_bytes, KM_NOFS);
1385 }
1386 if (rnew_size > ifp->if_real_bytes) {
1387 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1388 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1389 rnew_size - ifp->if_real_bytes);
1390 }
1391 }
Jie Liu17ec81c2013-09-22 16:25:15 +08001392 /* Switch from the inline extent buffer to a direct extent list */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001393 else {
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001394 if (!is_power_of_2(new_size)) {
1395 rnew_size = roundup_pow_of_two(new_size);
1396 }
1397 xfs_iext_inline_to_direct(ifp, rnew_size);
1398 }
1399 ifp->if_real_bytes = rnew_size;
1400 ifp->if_bytes = new_size;
1401}
1402
1403/*
1404 * Switch from linear (direct) extent records to inline buffer.
1405 */
1406void
1407xfs_iext_direct_to_inline(
1408 xfs_ifork_t *ifp, /* inode fork pointer */
1409 xfs_extnum_t nextents) /* number of extents in file */
1410{
1411 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1412 ASSERT(nextents <= XFS_INLINE_EXTS);
1413 /*
1414 * The inline buffer was zeroed when we switched
1415 * from inline to direct extent allocation mode,
1416 * so we don't need to clear it here.
1417 */
1418 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1419 nextents * sizeof(xfs_bmbt_rec_t));
1420 kmem_free(ifp->if_u1.if_extents);
1421 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1422 ifp->if_real_bytes = 0;
1423}
1424
1425/*
1426 * Switch from inline buffer to linear (direct) extent records.
1427 * new_size should already be rounded up to the next power of 2
1428 * by the caller (when appropriate), so use new_size as it is.
1429 * However, since new_size may be rounded up, we can't update
1430 * if_bytes here. It is the caller's responsibility to update
1431 * if_bytes upon return.
1432 */
1433void
1434xfs_iext_inline_to_direct(
1435 xfs_ifork_t *ifp, /* inode fork pointer */
1436 int new_size) /* number of extents in file */
1437{
1438 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1439 memset(ifp->if_u1.if_extents, 0, new_size);
1440 if (ifp->if_bytes) {
1441 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1442 ifp->if_bytes);
1443 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1444 sizeof(xfs_bmbt_rec_t));
1445 }
1446 ifp->if_real_bytes = new_size;
1447}
1448
1449/*
1450 * Resize an extent indirection array to new_size bytes.
1451 */
1452STATIC void
1453xfs_iext_realloc_indirect(
1454 xfs_ifork_t *ifp, /* inode fork pointer */
1455 int new_size) /* new indirection array size */
1456{
1457 int nlists; /* number of irec's (ex lists) */
1458 int size; /* current indirection array size */
1459
1460 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1461 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1462 size = nlists * sizeof(xfs_ext_irec_t);
1463 ASSERT(ifp->if_real_bytes);
1464 ASSERT((new_size >= 0) && (new_size != size));
1465 if (new_size == 0) {
1466 xfs_iext_destroy(ifp);
1467 } else {
1468 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1469 kmem_realloc(ifp->if_u1.if_ext_irec,
1470 new_size, size, KM_NOFS);
1471 }
1472}
1473
1474/*
1475 * Switch from indirection array to linear (direct) extent allocations.
1476 */
1477STATIC void
1478xfs_iext_indirect_to_direct(
1479 xfs_ifork_t *ifp) /* inode fork pointer */
1480{
1481 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1482 xfs_extnum_t nextents; /* number of extents in file */
1483 int size; /* size of file extents */
1484
1485 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1486 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1487 ASSERT(nextents <= XFS_LINEAR_EXTS);
1488 size = nextents * sizeof(xfs_bmbt_rec_t);
1489
1490 xfs_iext_irec_compact_pages(ifp);
1491 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1492
1493 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1494 kmem_free(ifp->if_u1.if_ext_irec);
1495 ifp->if_flags &= ~XFS_IFEXTIREC;
1496 ifp->if_u1.if_extents = ep;
1497 ifp->if_bytes = size;
1498 if (nextents < XFS_LINEAR_EXTS) {
1499 xfs_iext_realloc_direct(ifp, size);
1500 }
1501}
1502
1503/*
1504 * Free incore file extents.
1505 */
1506void
1507xfs_iext_destroy(
1508 xfs_ifork_t *ifp) /* inode fork pointer */
1509{
1510 if (ifp->if_flags & XFS_IFEXTIREC) {
1511 int erp_idx;
1512 int nlists;
1513
1514 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1515 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1516 xfs_iext_irec_remove(ifp, erp_idx);
1517 }
1518 ifp->if_flags &= ~XFS_IFEXTIREC;
1519 } else if (ifp->if_real_bytes) {
1520 kmem_free(ifp->if_u1.if_extents);
1521 } else if (ifp->if_bytes) {
1522 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1523 sizeof(xfs_bmbt_rec_t));
1524 }
1525 ifp->if_u1.if_extents = NULL;
1526 ifp->if_real_bytes = 0;
1527 ifp->if_bytes = 0;
1528}
1529
1530/*
1531 * Return a pointer to the extent record for file system block bno.
1532 */
1533xfs_bmbt_rec_host_t * /* pointer to found extent record */
1534xfs_iext_bno_to_ext(
1535 xfs_ifork_t *ifp, /* inode fork pointer */
1536 xfs_fileoff_t bno, /* block number to search for */
1537 xfs_extnum_t *idxp) /* index of target extent */
1538{
1539 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1540 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1541 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1542 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1543 int high; /* upper boundary in search */
1544 xfs_extnum_t idx = 0; /* index of target extent */
1545 int low; /* lower boundary in search */
1546 xfs_extnum_t nextents; /* number of file extents */
1547 xfs_fileoff_t startoff = 0; /* start offset of extent */
1548
1549 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1550 if (nextents == 0) {
1551 *idxp = 0;
1552 return NULL;
1553 }
1554 low = 0;
1555 if (ifp->if_flags & XFS_IFEXTIREC) {
1556 /* Find target extent list */
1557 int erp_idx = 0;
1558 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1559 base = erp->er_extbuf;
1560 high = erp->er_extcount - 1;
1561 } else {
1562 base = ifp->if_u1.if_extents;
1563 high = nextents - 1;
1564 }
1565 /* Binary search extent records */
1566 while (low <= high) {
1567 idx = (low + high) >> 1;
1568 ep = base + idx;
1569 startoff = xfs_bmbt_get_startoff(ep);
1570 blockcount = xfs_bmbt_get_blockcount(ep);
1571 if (bno < startoff) {
1572 high = idx - 1;
1573 } else if (bno >= startoff + blockcount) {
1574 low = idx + 1;
1575 } else {
1576 /* Convert back to file-based extent index */
1577 if (ifp->if_flags & XFS_IFEXTIREC) {
1578 idx += erp->er_extoff;
1579 }
1580 *idxp = idx;
1581 return ep;
1582 }
1583 }
1584 /* Convert back to file-based extent index */
1585 if (ifp->if_flags & XFS_IFEXTIREC) {
1586 idx += erp->er_extoff;
1587 }
1588 if (bno >= startoff + blockcount) {
1589 if (++idx == nextents) {
1590 ep = NULL;
1591 } else {
1592 ep = xfs_iext_get_ext(ifp, idx);
1593 }
1594 }
1595 *idxp = idx;
1596 return ep;
1597}
1598
1599/*
1600 * Return a pointer to the indirection array entry containing the
1601 * extent record for filesystem block bno. Store the index of the
1602 * target irec in *erp_idxp.
1603 */
1604xfs_ext_irec_t * /* pointer to found extent record */
1605xfs_iext_bno_to_irec(
1606 xfs_ifork_t *ifp, /* inode fork pointer */
1607 xfs_fileoff_t bno, /* block number to search for */
1608 int *erp_idxp) /* irec index of target ext list */
1609{
1610 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1611 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1612 int erp_idx; /* indirection array index */
1613 int nlists; /* number of extent irec's (lists) */
1614 int high; /* binary search upper limit */
1615 int low; /* binary search lower limit */
1616
1617 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1618 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1619 erp_idx = 0;
1620 low = 0;
1621 high = nlists - 1;
1622 while (low <= high) {
1623 erp_idx = (low + high) >> 1;
1624 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1625 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1626 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1627 high = erp_idx - 1;
1628 } else if (erp_next && bno >=
1629 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1630 low = erp_idx + 1;
1631 } else {
1632 break;
1633 }
1634 }
1635 *erp_idxp = erp_idx;
1636 return erp;
1637}
1638
1639/*
1640 * Return a pointer to the indirection array entry containing the
1641 * extent record at file extent index *idxp. Store the index of the
1642 * target irec in *erp_idxp and store the page index of the target
1643 * extent record in *idxp.
1644 */
1645xfs_ext_irec_t *
1646xfs_iext_idx_to_irec(
1647 xfs_ifork_t *ifp, /* inode fork pointer */
1648 xfs_extnum_t *idxp, /* extent index (file -> page) */
1649 int *erp_idxp, /* pointer to target irec */
1650 int realloc) /* new bytes were just added */
1651{
1652 xfs_ext_irec_t *prev; /* pointer to previous irec */
1653 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1654 int erp_idx; /* indirection array index */
1655 int nlists; /* number of irec's (ex lists) */
1656 int high; /* binary search upper limit */
1657 int low; /* binary search lower limit */
1658 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1659
1660 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1661 ASSERT(page_idx >= 0);
1662 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1663 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1664
1665 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1666 erp_idx = 0;
1667 low = 0;
1668 high = nlists - 1;
1669
1670 /* Binary search extent irec's */
1671 while (low <= high) {
1672 erp_idx = (low + high) >> 1;
1673 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1674 prev = erp_idx > 0 ? erp - 1 : NULL;
1675 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1676 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1677 high = erp_idx - 1;
1678 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1679 (page_idx == erp->er_extoff + erp->er_extcount &&
1680 !realloc)) {
1681 low = erp_idx + 1;
1682 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1683 erp->er_extcount == XFS_LINEAR_EXTS) {
1684 ASSERT(realloc);
1685 page_idx = 0;
1686 erp_idx++;
1687 erp = erp_idx < nlists ? erp + 1 : NULL;
1688 break;
1689 } else {
1690 page_idx -= erp->er_extoff;
1691 break;
1692 }
1693 }
1694 *idxp = page_idx;
1695 *erp_idxp = erp_idx;
1696 return(erp);
1697}
1698
1699/*
1700 * Allocate and initialize an indirection array once the space needed
1701 * for incore extents increases above XFS_IEXT_BUFSZ.
1702 */
1703void
1704xfs_iext_irec_init(
1705 xfs_ifork_t *ifp) /* inode fork pointer */
1706{
1707 xfs_ext_irec_t *erp; /* indirection array pointer */
1708 xfs_extnum_t nextents; /* number of extents in file */
1709
1710 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1711 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1712 ASSERT(nextents <= XFS_LINEAR_EXTS);
1713
1714 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1715
1716 if (nextents == 0) {
1717 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1718 } else if (!ifp->if_real_bytes) {
1719 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1720 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1721 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1722 }
1723 erp->er_extbuf = ifp->if_u1.if_extents;
1724 erp->er_extcount = nextents;
1725 erp->er_extoff = 0;
1726
1727 ifp->if_flags |= XFS_IFEXTIREC;
1728 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1729 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1730 ifp->if_u1.if_ext_irec = erp;
1731
1732 return;
1733}
1734
1735/*
1736 * Allocate and initialize a new entry in the indirection array.
1737 */
1738xfs_ext_irec_t *
1739xfs_iext_irec_new(
1740 xfs_ifork_t *ifp, /* inode fork pointer */
1741 int erp_idx) /* index for new irec */
1742{
1743 xfs_ext_irec_t *erp; /* indirection array pointer */
1744 int i; /* loop counter */
1745 int nlists; /* number of irec's (ex lists) */
1746
1747 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1748 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1749
1750 /* Resize indirection array */
1751 xfs_iext_realloc_indirect(ifp, ++nlists *
1752 sizeof(xfs_ext_irec_t));
1753 /*
1754 * Move records down in the array so the
1755 * new page can use erp_idx.
1756 */
1757 erp = ifp->if_u1.if_ext_irec;
1758 for (i = nlists - 1; i > erp_idx; i--) {
1759 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1760 }
1761 ASSERT(i == erp_idx);
1762
1763 /* Initialize new extent record */
1764 erp = ifp->if_u1.if_ext_irec;
1765 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1766 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1767 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1768 erp[erp_idx].er_extcount = 0;
1769 erp[erp_idx].er_extoff = erp_idx > 0 ?
1770 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1771 return (&erp[erp_idx]);
1772}
1773
1774/*
1775 * Remove a record from the indirection array.
1776 */
1777void
1778xfs_iext_irec_remove(
1779 xfs_ifork_t *ifp, /* inode fork pointer */
1780 int erp_idx) /* irec index to remove */
1781{
1782 xfs_ext_irec_t *erp; /* indirection array pointer */
1783 int i; /* loop counter */
1784 int nlists; /* number of irec's (ex lists) */
1785
1786 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1787 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1788 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1789 if (erp->er_extbuf) {
1790 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1791 -erp->er_extcount);
1792 kmem_free(erp->er_extbuf);
1793 }
1794 /* Compact extent records */
1795 erp = ifp->if_u1.if_ext_irec;
1796 for (i = erp_idx; i < nlists - 1; i++) {
1797 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1798 }
1799 /*
1800 * Manually free the last extent record from the indirection
1801 * array. A call to xfs_iext_realloc_indirect() with a size
1802 * of zero would result in a call to xfs_iext_destroy() which
1803 * would in turn call this function again, creating a nasty
1804 * infinite loop.
1805 */
1806 if (--nlists) {
1807 xfs_iext_realloc_indirect(ifp,
1808 nlists * sizeof(xfs_ext_irec_t));
1809 } else {
1810 kmem_free(ifp->if_u1.if_ext_irec);
1811 }
1812 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1813}
1814
1815/*
1816 * This is called to clean up large amounts of unused memory allocated
1817 * by the indirection array. Before compacting anything though, verify
1818 * that the indirection array is still needed and switch back to the
1819 * linear extent list (or even the inline buffer) if possible. The
1820 * compaction policy is as follows:
1821 *
1822 * Full Compaction: Extents fit into a single page (or inline buffer)
1823 * Partial Compaction: Extents occupy less than 50% of allocated space
1824 * No Compaction: Extents occupy at least 50% of allocated space
1825 */
1826void
1827xfs_iext_irec_compact(
1828 xfs_ifork_t *ifp) /* inode fork pointer */
1829{
1830 xfs_extnum_t nextents; /* number of extents in file */
1831 int nlists; /* number of irec's (ex lists) */
1832
1833 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1834 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1835 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1836
1837 if (nextents == 0) {
1838 xfs_iext_destroy(ifp);
1839 } else if (nextents <= XFS_INLINE_EXTS) {
1840 xfs_iext_indirect_to_direct(ifp);
1841 xfs_iext_direct_to_inline(ifp, nextents);
1842 } else if (nextents <= XFS_LINEAR_EXTS) {
1843 xfs_iext_indirect_to_direct(ifp);
1844 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1845 xfs_iext_irec_compact_pages(ifp);
1846 }
1847}
1848
1849/*
1850 * Combine extents from neighboring extent pages.
1851 */
1852void
1853xfs_iext_irec_compact_pages(
1854 xfs_ifork_t *ifp) /* inode fork pointer */
1855{
1856 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1857 int erp_idx = 0; /* indirection array index */
1858 int nlists; /* number of irec's (ex lists) */
1859
1860 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1861 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1862 while (erp_idx < nlists - 1) {
1863 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1864 erp_next = erp + 1;
1865 if (erp_next->er_extcount <=
1866 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1867 memcpy(&erp->er_extbuf[erp->er_extcount],
1868 erp_next->er_extbuf, erp_next->er_extcount *
1869 sizeof(xfs_bmbt_rec_t));
1870 erp->er_extcount += erp_next->er_extcount;
1871 /*
1872 * Free page before removing extent record
1873 * so er_extoffs don't get modified in
1874 * xfs_iext_irec_remove.
1875 */
1876 kmem_free(erp_next->er_extbuf);
1877 erp_next->er_extbuf = NULL;
1878 xfs_iext_irec_remove(ifp, erp_idx + 1);
1879 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1880 } else {
1881 erp_idx++;
1882 }
1883 }
1884}
1885
1886/*
1887 * This is called to update the er_extoff field in the indirection
1888 * array when extents have been added or removed from one of the
1889 * extent lists. erp_idx contains the irec index to begin updating
1890 * at and ext_diff contains the number of extents that were added
1891 * or removed.
1892 */
1893void
1894xfs_iext_irec_update_extoffs(
1895 xfs_ifork_t *ifp, /* inode fork pointer */
1896 int erp_idx, /* irec index to update */
1897 int ext_diff) /* number of new extents */
1898{
1899 int i; /* loop counter */
1900 int nlists; /* number of irec's (ex lists */
1901
1902 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1903 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1904 for (i = erp_idx; i < nlists; i++) {
1905 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1906 }
1907}