blob: 0bf1c747439dc1e9fa74c95a4a38e106cb9f377c [file] [log] [blame]
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110023#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100025#include "xfs_mount.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100026#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110027#include "xfs_trans.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100028#include "xfs_inode_item.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110029#include "xfs_bmap_btree.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100030#include "xfs_bmap.h"
31#include "xfs_error.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100032#include "xfs_trace.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110033#include "xfs_attr_sf.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100034
35kmem_zone_t *xfs_ifork_zone;
36
37STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
38STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
39STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
40
41#ifdef DEBUG
42/*
43 * Make sure that the extents in the given memory buffer
44 * are valid.
45 */
46void
47xfs_validate_extents(
48 xfs_ifork_t *ifp,
49 int nrecs,
50 xfs_exntfmt_t fmt)
51{
52 xfs_bmbt_irec_t irec;
53 xfs_bmbt_rec_host_t rec;
54 int i;
55
56 for (i = 0; i < nrecs; i++) {
57 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
58 rec.l0 = get_unaligned(&ep->l0);
59 rec.l1 = get_unaligned(&ep->l1);
60 xfs_bmbt_get_all(&rec, &irec);
61 if (fmt == XFS_EXTFMT_NOSTATE)
62 ASSERT(irec.br_state == XFS_EXT_NORM);
63 }
64}
65#else /* DEBUG */
66#define xfs_validate_extents(ifp, nrecs, fmt)
67#endif /* DEBUG */
68
69
70/*
71 * Move inode type and inode format specific information from the
72 * on-disk inode to the in-core inode. For fifos, devs, and sockets
73 * this means set if_rdev to the proper value. For files, directories,
74 * and symlinks this means to bring in the in-line data or extent
75 * pointers. For a file in B-tree format, only the root is immediately
76 * brought in-core. The rest will be in-lined in if_extents when it
77 * is first referenced (see xfs_iread_extents()).
78 */
79int
80xfs_iformat_fork(
81 xfs_inode_t *ip,
82 xfs_dinode_t *dip)
83{
84 xfs_attr_shortform_t *atp;
85 int size;
86 int error = 0;
87 xfs_fsize_t di_size;
88
89 if (unlikely(be32_to_cpu(dip->di_nextents) +
90 be16_to_cpu(dip->di_anextents) >
91 be64_to_cpu(dip->di_nblocks))) {
92 xfs_warn(ip->i_mount,
93 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
94 (unsigned long long)ip->i_ino,
95 (int)(be32_to_cpu(dip->di_nextents) +
96 be16_to_cpu(dip->di_anextents)),
97 (unsigned long long)
98 be64_to_cpu(dip->di_nblocks));
99 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
100 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000101 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000102 }
103
104 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
105 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
106 (unsigned long long)ip->i_ino,
107 dip->di_forkoff);
108 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
109 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000110 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000111 }
112
113 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
114 !ip->i_mount->m_rtdev_targp)) {
115 xfs_warn(ip->i_mount,
116 "corrupt dinode %Lu, has realtime flag set.",
117 ip->i_ino);
118 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
119 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000120 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000121 }
122
Dave Chinnerc19b3b052016-02-09 16:54:58 +1100123 switch (VFS_I(ip)->i_mode & S_IFMT) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000124 case S_IFIFO:
125 case S_IFCHR:
126 case S_IFBLK:
127 case S_IFSOCK:
128 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
129 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
130 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000131 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000132 }
133 ip->i_d.di_size = 0;
134 ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
135 break;
136
137 case S_IFREG:
138 case S_IFLNK:
139 case S_IFDIR:
140 switch (dip->di_format) {
141 case XFS_DINODE_FMT_LOCAL:
142 /*
143 * no local regular files yet
144 */
145 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
146 xfs_warn(ip->i_mount,
147 "corrupt inode %Lu (local format for regular file).",
148 (unsigned long long) ip->i_ino);
149 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
150 XFS_ERRLEVEL_LOW,
151 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000152 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000153 }
154
155 di_size = be64_to_cpu(dip->di_size);
Dan Carpenter0d0ab122013-08-15 08:53:38 +0300156 if (unlikely(di_size < 0 ||
157 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000158 xfs_warn(ip->i_mount,
159 "corrupt inode %Lu (bad size %Ld for local inode).",
160 (unsigned long long) ip->i_ino,
161 (long long) di_size);
162 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
163 XFS_ERRLEVEL_LOW,
164 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000165 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000166 }
167
168 size = (int)di_size;
169 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
170 break;
171 case XFS_DINODE_FMT_EXTENTS:
172 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
173 break;
174 case XFS_DINODE_FMT_BTREE:
175 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
176 break;
177 default:
178 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
179 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000180 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000181 }
182 break;
183
184 default:
185 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000186 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000187 }
188 if (error) {
189 return error;
190 }
191 if (!XFS_DFORK_Q(dip))
192 return 0;
193
194 ASSERT(ip->i_afp == NULL);
195 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
196
197 switch (dip->di_aformat) {
198 case XFS_DINODE_FMT_LOCAL:
199 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
200 size = be16_to_cpu(atp->hdr.totsize);
201
202 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
203 xfs_warn(ip->i_mount,
204 "corrupt inode %Lu (bad attr fork size %Ld).",
205 (unsigned long long) ip->i_ino,
206 (long long) size);
207 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
208 XFS_ERRLEVEL_LOW,
209 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000210 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000211 }
212
213 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
214 break;
215 case XFS_DINODE_FMT_EXTENTS:
216 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
217 break;
218 case XFS_DINODE_FMT_BTREE:
219 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
220 break;
221 default:
Dave Chinner24513372014-06-25 14:58:08 +1000222 error = -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000223 break;
224 }
225 if (error) {
226 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
227 ip->i_afp = NULL;
228 xfs_idestroy_fork(ip, XFS_DATA_FORK);
229 }
230 return error;
231}
232
233/*
234 * The file is in-lined in the on-disk inode.
235 * If it fits into if_inline_data, then copy
236 * it there, otherwise allocate a buffer for it
237 * and copy the data there. Either way, set
238 * if_data to point at the data.
239 * If we allocate a buffer for the data, make
240 * sure that its size is a multiple of 4 and
241 * record the real size in i_real_bytes.
242 */
243STATIC int
244xfs_iformat_local(
245 xfs_inode_t *ip,
246 xfs_dinode_t *dip,
247 int whichfork,
248 int size)
249{
250 xfs_ifork_t *ifp;
251 int real_size;
252
253 /*
254 * If the size is unreasonable, then something
255 * is wrong and we just bail out rather than crash in
256 * kmem_alloc() or memcpy() below.
257 */
258 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
259 xfs_warn(ip->i_mount,
260 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
261 (unsigned long long) ip->i_ino, size,
262 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
263 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
264 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000265 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000266 }
267 ifp = XFS_IFORK_PTR(ip, whichfork);
268 real_size = 0;
269 if (size == 0)
270 ifp->if_u1.if_data = NULL;
271 else if (size <= sizeof(ifp->if_u2.if_inline_data))
272 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
273 else {
274 real_size = roundup(size, 4);
275 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
276 }
277 ifp->if_bytes = size;
278 ifp->if_real_bytes = real_size;
279 if (size)
280 memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
281 ifp->if_flags &= ~XFS_IFEXTENTS;
282 ifp->if_flags |= XFS_IFINLINE;
283 return 0;
284}
285
286/*
287 * The file consists of a set of extents all
288 * of which fit into the on-disk inode.
289 * If there are few enough extents to fit into
290 * the if_inline_ext, then copy them there.
291 * Otherwise allocate a buffer for them and copy
292 * them into it. Either way, set if_extents
293 * to point at the extents.
294 */
295STATIC int
296xfs_iformat_extents(
297 xfs_inode_t *ip,
298 xfs_dinode_t *dip,
299 int whichfork)
300{
301 xfs_bmbt_rec_t *dp;
302 xfs_ifork_t *ifp;
303 int nex;
304 int size;
305 int i;
306
307 ifp = XFS_IFORK_PTR(ip, whichfork);
308 nex = XFS_DFORK_NEXTENTS(dip, whichfork);
309 size = nex * (uint)sizeof(xfs_bmbt_rec_t);
310
311 /*
312 * If the number of extents is unreasonable, then something
313 * is wrong and we just bail out rather than crash in
314 * kmem_alloc() or memcpy() below.
315 */
316 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
317 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
318 (unsigned long long) ip->i_ino, nex);
319 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
320 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000321 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000322 }
323
324 ifp->if_real_bytes = 0;
325 if (nex == 0)
326 ifp->if_u1.if_extents = NULL;
327 else if (nex <= XFS_INLINE_EXTS)
328 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
329 else
330 xfs_iext_add(ifp, 0, nex);
331
332 ifp->if_bytes = size;
333 if (size) {
334 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
335 xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
336 for (i = 0; i < nex; i++, dp++) {
337 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
338 ep->l0 = get_unaligned_be64(&dp->l0);
339 ep->l1 = get_unaligned_be64(&dp->l1);
340 }
341 XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
342 if (whichfork != XFS_DATA_FORK ||
343 XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
344 if (unlikely(xfs_check_nostate_extents(
345 ifp, 0, nex))) {
346 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
347 XFS_ERRLEVEL_LOW,
348 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000349 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000350 }
351 }
352 ifp->if_flags |= XFS_IFEXTENTS;
353 return 0;
354}
355
356/*
357 * The file has too many extents to fit into
358 * the inode, so they are in B-tree format.
359 * Allocate a buffer for the root of the B-tree
360 * and copy the root into it. The i_extents
361 * field will remain NULL until all of the
362 * extents are read in (when they are needed).
363 */
364STATIC int
365xfs_iformat_btree(
366 xfs_inode_t *ip,
367 xfs_dinode_t *dip,
368 int whichfork)
369{
370 struct xfs_mount *mp = ip->i_mount;
371 xfs_bmdr_block_t *dfp;
372 xfs_ifork_t *ifp;
373 /* REFERENCED */
374 int nrecs;
375 int size;
376
377 ifp = XFS_IFORK_PTR(ip, whichfork);
378 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
379 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
380 nrecs = be16_to_cpu(dfp->bb_numrecs);
381
382 /*
383 * blow out if -- fork has less extents than can fit in
384 * fork (fork shouldn't be a btree format), root btree
385 * block has more records than can fit into the fork,
386 * or the number of extents is greater than the number of
387 * blocks.
388 */
389 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
390 XFS_IFORK_MAXEXT(ip, whichfork) ||
391 XFS_BMDR_SPACE_CALC(nrecs) >
392 XFS_DFORK_SIZE(dip, mp, whichfork) ||
393 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
394 xfs_warn(mp, "corrupt inode %Lu (btree).",
395 (unsigned long long) ip->i_ino);
396 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
397 mp, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000398 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000399 }
400
401 ifp->if_broot_bytes = size;
402 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
403 ASSERT(ifp->if_broot != NULL);
404 /*
405 * Copy and convert from the on-disk structure
406 * to the in-memory structure.
407 */
408 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
409 ifp->if_broot, size);
410 ifp->if_flags &= ~XFS_IFEXTENTS;
411 ifp->if_flags |= XFS_IFBROOT;
412
413 return 0;
414}
415
416/*
417 * Read in extents from a btree-format inode.
418 * Allocate and fill in if_extents. Real work is done in xfs_bmap.c.
419 */
420int
421xfs_iread_extents(
422 xfs_trans_t *tp,
423 xfs_inode_t *ip,
424 int whichfork)
425{
426 int error;
427 xfs_ifork_t *ifp;
428 xfs_extnum_t nextents;
429
Christoph Hellwigeef334e2013-12-06 12:30:17 -0800430 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
431
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000432 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
433 XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
434 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000435 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000436 }
437 nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
438 ifp = XFS_IFORK_PTR(ip, whichfork);
439
440 /*
441 * We know that the size is valid (it's checked in iformat_btree)
442 */
443 ifp->if_bytes = ifp->if_real_bytes = 0;
444 ifp->if_flags |= XFS_IFEXTENTS;
445 xfs_iext_add(ifp, 0, nextents);
446 error = xfs_bmap_read_extents(tp, ip, whichfork);
447 if (error) {
448 xfs_iext_destroy(ifp);
449 ifp->if_flags &= ~XFS_IFEXTENTS;
450 return error;
451 }
452 xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
453 return 0;
454}
455/*
456 * Reallocate the space for if_broot based on the number of records
457 * being added or deleted as indicated in rec_diff. Move the records
458 * and pointers in if_broot to fit the new size. When shrinking this
459 * will eliminate holes between the records and pointers created by
460 * the caller. When growing this will create holes to be filled in
461 * by the caller.
462 *
463 * The caller must not request to add more records than would fit in
464 * the on-disk inode root. If the if_broot is currently NULL, then
Zhi Yong Wuf6c27342013-08-07 10:11:04 +0000465 * if we are adding records, one will be allocated. The caller must also
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000466 * not request that the number of records go below zero, although
467 * it can go to zero.
468 *
469 * ip -- the inode whose if_broot area is changing
470 * ext_diff -- the change in the number of records, positive or negative,
471 * requested for the if_broot array.
472 */
473void
474xfs_iroot_realloc(
475 xfs_inode_t *ip,
476 int rec_diff,
477 int whichfork)
478{
479 struct xfs_mount *mp = ip->i_mount;
480 int cur_max;
481 xfs_ifork_t *ifp;
482 struct xfs_btree_block *new_broot;
483 int new_max;
484 size_t new_size;
485 char *np;
486 char *op;
487
488 /*
489 * Handle the degenerate case quietly.
490 */
491 if (rec_diff == 0) {
492 return;
493 }
494
495 ifp = XFS_IFORK_PTR(ip, whichfork);
496 if (rec_diff > 0) {
497 /*
498 * If there wasn't any memory allocated before, just
499 * allocate it now and get out.
500 */
501 if (ifp->if_broot_bytes == 0) {
502 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
503 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
504 ifp->if_broot_bytes = (int)new_size;
505 return;
506 }
507
508 /*
509 * If there is already an existing if_broot, then we need
510 * to realloc() it and shift the pointers to their new
511 * location. The records don't change location because
512 * they are kept butted up against the btree block header.
513 */
514 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
515 new_max = cur_max + rec_diff;
516 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
517 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
518 XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
519 KM_SLEEP | KM_NOFS);
520 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
521 ifp->if_broot_bytes);
522 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
523 (int)new_size);
524 ifp->if_broot_bytes = (int)new_size;
525 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
526 XFS_IFORK_SIZE(ip, whichfork));
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000527 memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000528 return;
529 }
530
531 /*
532 * rec_diff is less than 0. In this case, we are shrinking the
533 * if_broot buffer. It must already exist. If we go to zero
534 * records, just get rid of the root and clear the status bit.
535 */
536 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
537 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
538 new_max = cur_max + rec_diff;
539 ASSERT(new_max >= 0);
540 if (new_max > 0)
541 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
542 else
543 new_size = 0;
544 if (new_size > 0) {
545 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
546 /*
547 * First copy over the btree block header.
548 */
549 memcpy(new_broot, ifp->if_broot,
550 XFS_BMBT_BLOCK_LEN(ip->i_mount));
551 } else {
552 new_broot = NULL;
553 ifp->if_flags &= ~XFS_IFBROOT;
554 }
555
556 /*
557 * Only copy the records and pointers if there are any.
558 */
559 if (new_max > 0) {
560 /*
561 * First copy the records.
562 */
563 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
564 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
565 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
566
567 /*
568 * Then copy the pointers.
569 */
570 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
571 ifp->if_broot_bytes);
572 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
573 (int)new_size);
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000574 memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000575 }
576 kmem_free(ifp->if_broot);
577 ifp->if_broot = new_broot;
578 ifp->if_broot_bytes = (int)new_size;
579 if (ifp->if_broot)
580 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
581 XFS_IFORK_SIZE(ip, whichfork));
582 return;
583}
584
585
586/*
587 * This is called when the amount of space needed for if_data
588 * is increased or decreased. The change in size is indicated by
589 * the number of bytes that need to be added or deleted in the
590 * byte_diff parameter.
591 *
592 * If the amount of space needed has decreased below the size of the
593 * inline buffer, then switch to using the inline buffer. Otherwise,
594 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
595 * to what is needed.
596 *
597 * ip -- the inode whose if_data area is changing
598 * byte_diff -- the change in the number of bytes, positive or negative,
599 * requested for the if_data array.
600 */
601void
602xfs_idata_realloc(
603 xfs_inode_t *ip,
604 int byte_diff,
605 int whichfork)
606{
607 xfs_ifork_t *ifp;
608 int new_size;
609 int real_size;
610
611 if (byte_diff == 0) {
612 return;
613 }
614
615 ifp = XFS_IFORK_PTR(ip, whichfork);
616 new_size = (int)ifp->if_bytes + byte_diff;
617 ASSERT(new_size >= 0);
618
619 if (new_size == 0) {
620 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
621 kmem_free(ifp->if_u1.if_data);
622 }
623 ifp->if_u1.if_data = NULL;
624 real_size = 0;
625 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
626 /*
627 * If the valid extents/data can fit in if_inline_ext/data,
628 * copy them from the malloc'd vector and free it.
629 */
630 if (ifp->if_u1.if_data == NULL) {
631 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
632 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
633 ASSERT(ifp->if_real_bytes != 0);
634 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
635 new_size);
636 kmem_free(ifp->if_u1.if_data);
637 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
638 }
639 real_size = 0;
640 } else {
641 /*
642 * Stuck with malloc/realloc.
643 * For inline data, the underlying buffer must be
644 * a multiple of 4 bytes in size so that it can be
645 * logged and stay on word boundaries. We enforce
646 * that here.
647 */
648 real_size = roundup(new_size, 4);
649 if (ifp->if_u1.if_data == NULL) {
650 ASSERT(ifp->if_real_bytes == 0);
651 ifp->if_u1.if_data = kmem_alloc(real_size,
652 KM_SLEEP | KM_NOFS);
653 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
654 /*
655 * Only do the realloc if the underlying size
656 * is really changing.
657 */
658 if (ifp->if_real_bytes != real_size) {
659 ifp->if_u1.if_data =
660 kmem_realloc(ifp->if_u1.if_data,
661 real_size,
662 ifp->if_real_bytes,
663 KM_SLEEP | KM_NOFS);
664 }
665 } else {
666 ASSERT(ifp->if_real_bytes == 0);
667 ifp->if_u1.if_data = kmem_alloc(real_size,
668 KM_SLEEP | KM_NOFS);
669 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
670 ifp->if_bytes);
671 }
672 }
673 ifp->if_real_bytes = real_size;
674 ifp->if_bytes = new_size;
675 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
676}
677
678void
679xfs_idestroy_fork(
680 xfs_inode_t *ip,
681 int whichfork)
682{
683 xfs_ifork_t *ifp;
684
685 ifp = XFS_IFORK_PTR(ip, whichfork);
686 if (ifp->if_broot != NULL) {
687 kmem_free(ifp->if_broot);
688 ifp->if_broot = NULL;
689 }
690
691 /*
692 * If the format is local, then we can't have an extents
693 * array so just look for an inline data array. If we're
694 * not local then we may or may not have an extents list,
695 * so check and free it up if we do.
696 */
697 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
698 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
699 (ifp->if_u1.if_data != NULL)) {
700 ASSERT(ifp->if_real_bytes != 0);
701 kmem_free(ifp->if_u1.if_data);
702 ifp->if_u1.if_data = NULL;
703 ifp->if_real_bytes = 0;
704 }
705 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
706 ((ifp->if_flags & XFS_IFEXTIREC) ||
707 ((ifp->if_u1.if_extents != NULL) &&
708 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
709 ASSERT(ifp->if_real_bytes != 0);
710 xfs_iext_destroy(ifp);
711 }
712 ASSERT(ifp->if_u1.if_extents == NULL ||
713 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
714 ASSERT(ifp->if_real_bytes == 0);
715 if (whichfork == XFS_ATTR_FORK) {
716 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
717 ip->i_afp = NULL;
718 }
719}
720
721/*
Christoph Hellwigda776502013-12-13 11:34:04 +1100722 * Convert in-core extents to on-disk form
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000723 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100724 * For either the data or attr fork in extent format, we need to endian convert
725 * the in-core extent as we place them into the on-disk inode.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000726 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100727 * In the case of the data fork, the in-core and on-disk fork sizes can be
728 * different due to delayed allocation extents. We only copy on-disk extents
729 * here, so callers must always use the physical fork size to determine the
730 * size of the buffer passed to this routine. We will return the size actually
731 * used.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000732 */
733int
734xfs_iextents_copy(
735 xfs_inode_t *ip,
736 xfs_bmbt_rec_t *dp,
737 int whichfork)
738{
739 int copied;
740 int i;
741 xfs_ifork_t *ifp;
742 int nrecs;
743 xfs_fsblock_t start_block;
744
745 ifp = XFS_IFORK_PTR(ip, whichfork);
746 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
747 ASSERT(ifp->if_bytes > 0);
748
749 nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
750 XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
751 ASSERT(nrecs > 0);
752
753 /*
754 * There are some delayed allocation extents in the
755 * inode, so copy the extents one at a time and skip
756 * the delayed ones. There must be at least one
757 * non-delayed extent.
758 */
759 copied = 0;
760 for (i = 0; i < nrecs; i++) {
761 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
762 start_block = xfs_bmbt_get_startblock(ep);
763 if (isnullstartblock(start_block)) {
764 /*
765 * It's a delayed allocation extent, so skip it.
766 */
767 continue;
768 }
769
770 /* Translate to on disk format */
Dave Chinnerc5c249b2013-08-12 20:49:43 +1000771 put_unaligned_be64(ep->l0, &dp->l0);
772 put_unaligned_be64(ep->l1, &dp->l1);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000773 dp++;
774 copied++;
775 }
776 ASSERT(copied != 0);
777 xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
778
779 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
780}
781
782/*
783 * Each of the following cases stores data into the same region
784 * of the on-disk inode, so only one of them can be valid at
785 * any given time. While it is possible to have conflicting formats
786 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
787 * in EXTENTS format, this can only happen when the fork has
788 * changed formats after being modified but before being flushed.
789 * In these cases, the format always takes precedence, because the
790 * format indicates the current state of the fork.
791 */
792void
793xfs_iflush_fork(
794 xfs_inode_t *ip,
795 xfs_dinode_t *dip,
796 xfs_inode_log_item_t *iip,
Eric Sandeenfd9fdba2014-04-14 19:04:46 +1000797 int whichfork)
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000798{
799 char *cp;
800 xfs_ifork_t *ifp;
801 xfs_mount_t *mp;
802 static const short brootflag[2] =
803 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
804 static const short dataflag[2] =
805 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
806 static const short extflag[2] =
807 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
808
809 if (!iip)
810 return;
811 ifp = XFS_IFORK_PTR(ip, whichfork);
812 /*
813 * This can happen if we gave up in iformat in an error path,
814 * for the attribute fork.
815 */
816 if (!ifp) {
817 ASSERT(whichfork == XFS_ATTR_FORK);
818 return;
819 }
820 cp = XFS_DFORK_PTR(dip, whichfork);
821 mp = ip->i_mount;
822 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
823 case XFS_DINODE_FMT_LOCAL:
824 if ((iip->ili_fields & dataflag[whichfork]) &&
825 (ifp->if_bytes > 0)) {
826 ASSERT(ifp->if_u1.if_data != NULL);
827 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
828 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
829 }
830 break;
831
832 case XFS_DINODE_FMT_EXTENTS:
833 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
834 !(iip->ili_fields & extflag[whichfork]));
835 if ((iip->ili_fields & extflag[whichfork]) &&
836 (ifp->if_bytes > 0)) {
837 ASSERT(xfs_iext_get_ext(ifp, 0));
838 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
839 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
840 whichfork);
841 }
842 break;
843
844 case XFS_DINODE_FMT_BTREE:
845 if ((iip->ili_fields & brootflag[whichfork]) &&
846 (ifp->if_broot_bytes > 0)) {
847 ASSERT(ifp->if_broot != NULL);
848 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
849 XFS_IFORK_SIZE(ip, whichfork));
850 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
851 (xfs_bmdr_block_t *)cp,
852 XFS_DFORK_SIZE(dip, mp, whichfork));
853 }
854 break;
855
856 case XFS_DINODE_FMT_DEV:
857 if (iip->ili_fields & XFS_ILOG_DEV) {
858 ASSERT(whichfork == XFS_DATA_FORK);
859 xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
860 }
861 break;
862
863 case XFS_DINODE_FMT_UUID:
864 if (iip->ili_fields & XFS_ILOG_UUID) {
865 ASSERT(whichfork == XFS_DATA_FORK);
866 memcpy(XFS_DFORK_DPTR(dip),
867 &ip->i_df.if_u2.if_uuid,
868 sizeof(uuid_t));
869 }
870 break;
871
872 default:
873 ASSERT(0);
874 break;
875 }
876}
877
878/*
879 * Return a pointer to the extent record at file index idx.
880 */
881xfs_bmbt_rec_host_t *
882xfs_iext_get_ext(
883 xfs_ifork_t *ifp, /* inode fork pointer */
884 xfs_extnum_t idx) /* index of target extent */
885{
886 ASSERT(idx >= 0);
887 ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
888
889 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
890 return ifp->if_u1.if_ext_irec->er_extbuf;
891 } else if (ifp->if_flags & XFS_IFEXTIREC) {
892 xfs_ext_irec_t *erp; /* irec pointer */
893 int erp_idx = 0; /* irec index */
894 xfs_extnum_t page_idx = idx; /* ext index in target list */
895
896 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
897 return &erp->er_extbuf[page_idx];
898 } else if (ifp->if_bytes) {
899 return &ifp->if_u1.if_extents[idx];
900 } else {
901 return NULL;
902 }
903}
904
905/*
906 * Insert new item(s) into the extent records for incore inode
907 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
908 */
909void
910xfs_iext_insert(
911 xfs_inode_t *ip, /* incore inode pointer */
912 xfs_extnum_t idx, /* starting index of new items */
913 xfs_extnum_t count, /* number of inserted items */
914 xfs_bmbt_irec_t *new, /* items to insert */
915 int state) /* type of extent conversion */
916{
917 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
918 xfs_extnum_t i; /* extent record index */
919
920 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
921
922 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
923 xfs_iext_add(ifp, idx, count);
924 for (i = idx; i < idx + count; i++, new++)
925 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
926}
927
928/*
929 * This is called when the amount of space required for incore file
930 * extents needs to be increased. The ext_diff parameter stores the
931 * number of new extents being added and the idx parameter contains
932 * the extent index where the new extents will be added. If the new
933 * extents are being appended, then we just need to (re)allocate and
934 * initialize the space. Otherwise, if the new extents are being
935 * inserted into the middle of the existing entries, a bit more work
936 * is required to make room for the new extents to be inserted. The
937 * caller is responsible for filling in the new extent entries upon
938 * return.
939 */
940void
941xfs_iext_add(
942 xfs_ifork_t *ifp, /* inode fork pointer */
943 xfs_extnum_t idx, /* index to begin adding exts */
944 int ext_diff) /* number of extents to add */
945{
946 int byte_diff; /* new bytes being added */
947 int new_size; /* size of extents after adding */
948 xfs_extnum_t nextents; /* number of extents in file */
949
950 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
951 ASSERT((idx >= 0) && (idx <= nextents));
952 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
953 new_size = ifp->if_bytes + byte_diff;
954 /*
955 * If the new number of extents (nextents + ext_diff)
956 * fits inside the inode, then continue to use the inline
957 * extent buffer.
958 */
959 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
960 if (idx < nextents) {
961 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
962 &ifp->if_u2.if_inline_ext[idx],
963 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
964 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
965 }
966 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
967 ifp->if_real_bytes = 0;
968 }
969 /*
970 * Otherwise use a linear (direct) extent list.
971 * If the extents are currently inside the inode,
972 * xfs_iext_realloc_direct will switch us from
973 * inline to direct extent allocation mode.
974 */
975 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
976 xfs_iext_realloc_direct(ifp, new_size);
977 if (idx < nextents) {
978 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
979 &ifp->if_u1.if_extents[idx],
980 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
981 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
982 }
983 }
984 /* Indirection array */
985 else {
986 xfs_ext_irec_t *erp;
987 int erp_idx = 0;
988 int page_idx = idx;
989
990 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
991 if (ifp->if_flags & XFS_IFEXTIREC) {
992 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
993 } else {
994 xfs_iext_irec_init(ifp);
995 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
996 erp = ifp->if_u1.if_ext_irec;
997 }
998 /* Extents fit in target extent page */
999 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1000 if (page_idx < erp->er_extcount) {
1001 memmove(&erp->er_extbuf[page_idx + ext_diff],
1002 &erp->er_extbuf[page_idx],
1003 (erp->er_extcount - page_idx) *
1004 sizeof(xfs_bmbt_rec_t));
1005 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1006 }
1007 erp->er_extcount += ext_diff;
1008 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1009 }
1010 /* Insert a new extent page */
1011 else if (erp) {
1012 xfs_iext_add_indirect_multi(ifp,
1013 erp_idx, page_idx, ext_diff);
1014 }
1015 /*
1016 * If extent(s) are being appended to the last page in
1017 * the indirection array and the new extent(s) don't fit
1018 * in the page, then erp is NULL and erp_idx is set to
1019 * the next index needed in the indirection array.
1020 */
1021 else {
Jie Liubb86d212013-10-25 14:52:44 +08001022 uint count = ext_diff;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001023
1024 while (count) {
1025 erp = xfs_iext_irec_new(ifp, erp_idx);
Jie Liubb86d212013-10-25 14:52:44 +08001026 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1027 count -= erp->er_extcount;
1028 if (count)
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001029 erp_idx++;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001030 }
1031 }
1032 }
1033 ifp->if_bytes = new_size;
1034}
1035
1036/*
1037 * This is called when incore extents are being added to the indirection
1038 * array and the new extents do not fit in the target extent list. The
1039 * erp_idx parameter contains the irec index for the target extent list
1040 * in the indirection array, and the idx parameter contains the extent
1041 * index within the list. The number of extents being added is stored
1042 * in the count parameter.
1043 *
1044 * |-------| |-------|
1045 * | | | | idx - number of extents before idx
1046 * | idx | | count |
1047 * | | | | count - number of extents being inserted at idx
1048 * |-------| |-------|
1049 * | count | | nex2 | nex2 - number of extents after idx + count
1050 * |-------| |-------|
1051 */
1052void
1053xfs_iext_add_indirect_multi(
1054 xfs_ifork_t *ifp, /* inode fork pointer */
1055 int erp_idx, /* target extent irec index */
1056 xfs_extnum_t idx, /* index within target list */
1057 int count) /* new extents being added */
1058{
1059 int byte_diff; /* new bytes being added */
1060 xfs_ext_irec_t *erp; /* pointer to irec entry */
1061 xfs_extnum_t ext_diff; /* number of extents to add */
1062 xfs_extnum_t ext_cnt; /* new extents still needed */
1063 xfs_extnum_t nex2; /* extents after idx + count */
1064 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1065 int nlists; /* number of irec's (lists) */
1066
1067 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1068 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1069 nex2 = erp->er_extcount - idx;
1070 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1071
1072 /*
1073 * Save second part of target extent list
1074 * (all extents past */
1075 if (nex2) {
1076 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1077 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1078 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1079 erp->er_extcount -= nex2;
1080 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1081 memset(&erp->er_extbuf[idx], 0, byte_diff);
1082 }
1083
1084 /*
1085 * Add the new extents to the end of the target
1086 * list, then allocate new irec record(s) and
1087 * extent buffer(s) as needed to store the rest
1088 * of the new extents.
1089 */
1090 ext_cnt = count;
1091 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1092 if (ext_diff) {
1093 erp->er_extcount += ext_diff;
1094 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1095 ext_cnt -= ext_diff;
1096 }
1097 while (ext_cnt) {
1098 erp_idx++;
1099 erp = xfs_iext_irec_new(ifp, erp_idx);
1100 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1101 erp->er_extcount = ext_diff;
1102 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1103 ext_cnt -= ext_diff;
1104 }
1105
1106 /* Add nex2 extents back to indirection array */
1107 if (nex2) {
1108 xfs_extnum_t ext_avail;
1109 int i;
1110
1111 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1112 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1113 i = 0;
1114 /*
1115 * If nex2 extents fit in the current page, append
1116 * nex2_ep after the new extents.
1117 */
1118 if (nex2 <= ext_avail) {
1119 i = erp->er_extcount;
1120 }
1121 /*
1122 * Otherwise, check if space is available in the
1123 * next page.
1124 */
1125 else if ((erp_idx < nlists - 1) &&
1126 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1127 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1128 erp_idx++;
1129 erp++;
1130 /* Create a hole for nex2 extents */
1131 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1132 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1133 }
1134 /*
1135 * Final choice, create a new extent page for
1136 * nex2 extents.
1137 */
1138 else {
1139 erp_idx++;
1140 erp = xfs_iext_irec_new(ifp, erp_idx);
1141 }
1142 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1143 kmem_free(nex2_ep);
1144 erp->er_extcount += nex2;
1145 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1146 }
1147}
1148
1149/*
1150 * This is called when the amount of space required for incore file
1151 * extents needs to be decreased. The ext_diff parameter stores the
1152 * number of extents to be removed and the idx parameter contains
1153 * the extent index where the extents will be removed from.
1154 *
1155 * If the amount of space needed has decreased below the linear
1156 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1157 * extent array. Otherwise, use kmem_realloc() to adjust the
1158 * size to what is needed.
1159 */
1160void
1161xfs_iext_remove(
1162 xfs_inode_t *ip, /* incore inode pointer */
1163 xfs_extnum_t idx, /* index to begin removing exts */
1164 int ext_diff, /* number of extents to remove */
1165 int state) /* type of extent conversion */
1166{
1167 xfs_ifork_t *ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
1168 xfs_extnum_t nextents; /* number of extents in file */
1169 int new_size; /* size of extents after removal */
1170
1171 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1172
1173 ASSERT(ext_diff > 0);
1174 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1175 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1176
1177 if (new_size == 0) {
1178 xfs_iext_destroy(ifp);
1179 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1180 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1181 } else if (ifp->if_real_bytes) {
1182 xfs_iext_remove_direct(ifp, idx, ext_diff);
1183 } else {
1184 xfs_iext_remove_inline(ifp, idx, ext_diff);
1185 }
1186 ifp->if_bytes = new_size;
1187}
1188
1189/*
1190 * This removes ext_diff extents from the inline buffer, beginning
1191 * at extent index idx.
1192 */
1193void
1194xfs_iext_remove_inline(
1195 xfs_ifork_t *ifp, /* inode fork pointer */
1196 xfs_extnum_t idx, /* index to begin removing exts */
1197 int ext_diff) /* number of extents to remove */
1198{
1199 int nextents; /* number of extents in file */
1200
1201 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1202 ASSERT(idx < XFS_INLINE_EXTS);
1203 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1204 ASSERT(((nextents - ext_diff) > 0) &&
1205 (nextents - ext_diff) < XFS_INLINE_EXTS);
1206
1207 if (idx + ext_diff < nextents) {
1208 memmove(&ifp->if_u2.if_inline_ext[idx],
1209 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1210 (nextents - (idx + ext_diff)) *
1211 sizeof(xfs_bmbt_rec_t));
1212 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1213 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1214 } else {
1215 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1216 ext_diff * sizeof(xfs_bmbt_rec_t));
1217 }
1218}
1219
1220/*
1221 * This removes ext_diff extents from a linear (direct) extent list,
1222 * beginning at extent index idx. If the extents are being removed
1223 * from the end of the list (ie. truncate) then we just need to re-
1224 * allocate the list to remove the extra space. Otherwise, if the
1225 * extents are being removed from the middle of the existing extent
1226 * entries, then we first need to move the extent records beginning
1227 * at idx + ext_diff up in the list to overwrite the records being
1228 * removed, then remove the extra space via kmem_realloc.
1229 */
1230void
1231xfs_iext_remove_direct(
1232 xfs_ifork_t *ifp, /* inode fork pointer */
1233 xfs_extnum_t idx, /* index to begin removing exts */
1234 int ext_diff) /* number of extents to remove */
1235{
1236 xfs_extnum_t nextents; /* number of extents in file */
1237 int new_size; /* size of extents after removal */
1238
1239 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1240 new_size = ifp->if_bytes -
1241 (ext_diff * sizeof(xfs_bmbt_rec_t));
1242 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1243
1244 if (new_size == 0) {
1245 xfs_iext_destroy(ifp);
1246 return;
1247 }
1248 /* Move extents up in the list (if needed) */
1249 if (idx + ext_diff < nextents) {
1250 memmove(&ifp->if_u1.if_extents[idx],
1251 &ifp->if_u1.if_extents[idx + ext_diff],
1252 (nextents - (idx + ext_diff)) *
1253 sizeof(xfs_bmbt_rec_t));
1254 }
1255 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1256 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1257 /*
1258 * Reallocate the direct extent list. If the extents
1259 * will fit inside the inode then xfs_iext_realloc_direct
1260 * will switch from direct to inline extent allocation
1261 * mode for us.
1262 */
1263 xfs_iext_realloc_direct(ifp, new_size);
1264 ifp->if_bytes = new_size;
1265}
1266
1267/*
1268 * This is called when incore extents are being removed from the
1269 * indirection array and the extents being removed span multiple extent
1270 * buffers. The idx parameter contains the file extent index where we
1271 * want to begin removing extents, and the count parameter contains
1272 * how many extents need to be removed.
1273 *
1274 * |-------| |-------|
1275 * | nex1 | | | nex1 - number of extents before idx
1276 * |-------| | count |
1277 * | | | | count - number of extents being removed at idx
1278 * | count | |-------|
1279 * | | | nex2 | nex2 - number of extents after idx + count
1280 * |-------| |-------|
1281 */
1282void
1283xfs_iext_remove_indirect(
1284 xfs_ifork_t *ifp, /* inode fork pointer */
1285 xfs_extnum_t idx, /* index to begin removing extents */
1286 int count) /* number of extents to remove */
1287{
1288 xfs_ext_irec_t *erp; /* indirection array pointer */
1289 int erp_idx = 0; /* indirection array index */
1290 xfs_extnum_t ext_cnt; /* extents left to remove */
1291 xfs_extnum_t ext_diff; /* extents to remove in current list */
1292 xfs_extnum_t nex1; /* number of extents before idx */
1293 xfs_extnum_t nex2; /* extents after idx + count */
1294 int page_idx = idx; /* index in target extent list */
1295
1296 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1297 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1298 ASSERT(erp != NULL);
1299 nex1 = page_idx;
1300 ext_cnt = count;
1301 while (ext_cnt) {
1302 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1303 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1304 /*
1305 * Check for deletion of entire list;
1306 * xfs_iext_irec_remove() updates extent offsets.
1307 */
1308 if (ext_diff == erp->er_extcount) {
1309 xfs_iext_irec_remove(ifp, erp_idx);
1310 ext_cnt -= ext_diff;
1311 nex1 = 0;
1312 if (ext_cnt) {
1313 ASSERT(erp_idx < ifp->if_real_bytes /
1314 XFS_IEXT_BUFSZ);
1315 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1316 nex1 = 0;
1317 continue;
1318 } else {
1319 break;
1320 }
1321 }
1322 /* Move extents up (if needed) */
1323 if (nex2) {
1324 memmove(&erp->er_extbuf[nex1],
1325 &erp->er_extbuf[nex1 + ext_diff],
1326 nex2 * sizeof(xfs_bmbt_rec_t));
1327 }
1328 /* Zero out rest of page */
1329 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1330 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1331 /* Update remaining counters */
1332 erp->er_extcount -= ext_diff;
1333 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1334 ext_cnt -= ext_diff;
1335 nex1 = 0;
1336 erp_idx++;
1337 erp++;
1338 }
1339 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1340 xfs_iext_irec_compact(ifp);
1341}
1342
1343/*
1344 * Create, destroy, or resize a linear (direct) block of extents.
1345 */
1346void
1347xfs_iext_realloc_direct(
1348 xfs_ifork_t *ifp, /* inode fork pointer */
Jie Liu17ec81c2013-09-22 16:25:15 +08001349 int new_size) /* new size of extents after adding */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001350{
1351 int rnew_size; /* real new size of extents */
1352
1353 rnew_size = new_size;
1354
1355 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1356 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1357 (new_size != ifp->if_real_bytes)));
1358
1359 /* Free extent records */
1360 if (new_size == 0) {
1361 xfs_iext_destroy(ifp);
1362 }
1363 /* Resize direct extent list and zero any new bytes */
1364 else if (ifp->if_real_bytes) {
1365 /* Check if extents will fit inside the inode */
1366 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1367 xfs_iext_direct_to_inline(ifp, new_size /
1368 (uint)sizeof(xfs_bmbt_rec_t));
1369 ifp->if_bytes = new_size;
1370 return;
1371 }
1372 if (!is_power_of_2(new_size)){
1373 rnew_size = roundup_pow_of_two(new_size);
1374 }
1375 if (rnew_size != ifp->if_real_bytes) {
1376 ifp->if_u1.if_extents =
1377 kmem_realloc(ifp->if_u1.if_extents,
1378 rnew_size,
1379 ifp->if_real_bytes, KM_NOFS);
1380 }
1381 if (rnew_size > ifp->if_real_bytes) {
1382 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1383 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1384 rnew_size - ifp->if_real_bytes);
1385 }
1386 }
Jie Liu17ec81c2013-09-22 16:25:15 +08001387 /* Switch from the inline extent buffer to a direct extent list */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001388 else {
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001389 if (!is_power_of_2(new_size)) {
1390 rnew_size = roundup_pow_of_two(new_size);
1391 }
1392 xfs_iext_inline_to_direct(ifp, rnew_size);
1393 }
1394 ifp->if_real_bytes = rnew_size;
1395 ifp->if_bytes = new_size;
1396}
1397
1398/*
1399 * Switch from linear (direct) extent records to inline buffer.
1400 */
1401void
1402xfs_iext_direct_to_inline(
1403 xfs_ifork_t *ifp, /* inode fork pointer */
1404 xfs_extnum_t nextents) /* number of extents in file */
1405{
1406 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1407 ASSERT(nextents <= XFS_INLINE_EXTS);
1408 /*
1409 * The inline buffer was zeroed when we switched
1410 * from inline to direct extent allocation mode,
1411 * so we don't need to clear it here.
1412 */
1413 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1414 nextents * sizeof(xfs_bmbt_rec_t));
1415 kmem_free(ifp->if_u1.if_extents);
1416 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1417 ifp->if_real_bytes = 0;
1418}
1419
1420/*
1421 * Switch from inline buffer to linear (direct) extent records.
1422 * new_size should already be rounded up to the next power of 2
1423 * by the caller (when appropriate), so use new_size as it is.
1424 * However, since new_size may be rounded up, we can't update
1425 * if_bytes here. It is the caller's responsibility to update
1426 * if_bytes upon return.
1427 */
1428void
1429xfs_iext_inline_to_direct(
1430 xfs_ifork_t *ifp, /* inode fork pointer */
1431 int new_size) /* number of extents in file */
1432{
1433 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1434 memset(ifp->if_u1.if_extents, 0, new_size);
1435 if (ifp->if_bytes) {
1436 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1437 ifp->if_bytes);
1438 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1439 sizeof(xfs_bmbt_rec_t));
1440 }
1441 ifp->if_real_bytes = new_size;
1442}
1443
1444/*
1445 * Resize an extent indirection array to new_size bytes.
1446 */
1447STATIC void
1448xfs_iext_realloc_indirect(
1449 xfs_ifork_t *ifp, /* inode fork pointer */
1450 int new_size) /* new indirection array size */
1451{
1452 int nlists; /* number of irec's (ex lists) */
1453 int size; /* current indirection array size */
1454
1455 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1456 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1457 size = nlists * sizeof(xfs_ext_irec_t);
1458 ASSERT(ifp->if_real_bytes);
1459 ASSERT((new_size >= 0) && (new_size != size));
1460 if (new_size == 0) {
1461 xfs_iext_destroy(ifp);
1462 } else {
1463 ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
1464 kmem_realloc(ifp->if_u1.if_ext_irec,
1465 new_size, size, KM_NOFS);
1466 }
1467}
1468
1469/*
1470 * Switch from indirection array to linear (direct) extent allocations.
1471 */
1472STATIC void
1473xfs_iext_indirect_to_direct(
1474 xfs_ifork_t *ifp) /* inode fork pointer */
1475{
1476 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1477 xfs_extnum_t nextents; /* number of extents in file */
1478 int size; /* size of file extents */
1479
1480 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1481 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1482 ASSERT(nextents <= XFS_LINEAR_EXTS);
1483 size = nextents * sizeof(xfs_bmbt_rec_t);
1484
1485 xfs_iext_irec_compact_pages(ifp);
1486 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1487
1488 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1489 kmem_free(ifp->if_u1.if_ext_irec);
1490 ifp->if_flags &= ~XFS_IFEXTIREC;
1491 ifp->if_u1.if_extents = ep;
1492 ifp->if_bytes = size;
1493 if (nextents < XFS_LINEAR_EXTS) {
1494 xfs_iext_realloc_direct(ifp, size);
1495 }
1496}
1497
1498/*
1499 * Free incore file extents.
1500 */
1501void
1502xfs_iext_destroy(
1503 xfs_ifork_t *ifp) /* inode fork pointer */
1504{
1505 if (ifp->if_flags & XFS_IFEXTIREC) {
1506 int erp_idx;
1507 int nlists;
1508
1509 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1510 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1511 xfs_iext_irec_remove(ifp, erp_idx);
1512 }
1513 ifp->if_flags &= ~XFS_IFEXTIREC;
1514 } else if (ifp->if_real_bytes) {
1515 kmem_free(ifp->if_u1.if_extents);
1516 } else if (ifp->if_bytes) {
1517 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1518 sizeof(xfs_bmbt_rec_t));
1519 }
1520 ifp->if_u1.if_extents = NULL;
1521 ifp->if_real_bytes = 0;
1522 ifp->if_bytes = 0;
1523}
1524
1525/*
1526 * Return a pointer to the extent record for file system block bno.
1527 */
1528xfs_bmbt_rec_host_t * /* pointer to found extent record */
1529xfs_iext_bno_to_ext(
1530 xfs_ifork_t *ifp, /* inode fork pointer */
1531 xfs_fileoff_t bno, /* block number to search for */
1532 xfs_extnum_t *idxp) /* index of target extent */
1533{
1534 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1535 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1536 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1537 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1538 int high; /* upper boundary in search */
1539 xfs_extnum_t idx = 0; /* index of target extent */
1540 int low; /* lower boundary in search */
1541 xfs_extnum_t nextents; /* number of file extents */
1542 xfs_fileoff_t startoff = 0; /* start offset of extent */
1543
1544 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1545 if (nextents == 0) {
1546 *idxp = 0;
1547 return NULL;
1548 }
1549 low = 0;
1550 if (ifp->if_flags & XFS_IFEXTIREC) {
1551 /* Find target extent list */
1552 int erp_idx = 0;
1553 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1554 base = erp->er_extbuf;
1555 high = erp->er_extcount - 1;
1556 } else {
1557 base = ifp->if_u1.if_extents;
1558 high = nextents - 1;
1559 }
1560 /* Binary search extent records */
1561 while (low <= high) {
1562 idx = (low + high) >> 1;
1563 ep = base + idx;
1564 startoff = xfs_bmbt_get_startoff(ep);
1565 blockcount = xfs_bmbt_get_blockcount(ep);
1566 if (bno < startoff) {
1567 high = idx - 1;
1568 } else if (bno >= startoff + blockcount) {
1569 low = idx + 1;
1570 } else {
1571 /* Convert back to file-based extent index */
1572 if (ifp->if_flags & XFS_IFEXTIREC) {
1573 idx += erp->er_extoff;
1574 }
1575 *idxp = idx;
1576 return ep;
1577 }
1578 }
1579 /* Convert back to file-based extent index */
1580 if (ifp->if_flags & XFS_IFEXTIREC) {
1581 idx += erp->er_extoff;
1582 }
1583 if (bno >= startoff + blockcount) {
1584 if (++idx == nextents) {
1585 ep = NULL;
1586 } else {
1587 ep = xfs_iext_get_ext(ifp, idx);
1588 }
1589 }
1590 *idxp = idx;
1591 return ep;
1592}
1593
1594/*
1595 * Return a pointer to the indirection array entry containing the
1596 * extent record for filesystem block bno. Store the index of the
1597 * target irec in *erp_idxp.
1598 */
1599xfs_ext_irec_t * /* pointer to found extent record */
1600xfs_iext_bno_to_irec(
1601 xfs_ifork_t *ifp, /* inode fork pointer */
1602 xfs_fileoff_t bno, /* block number to search for */
1603 int *erp_idxp) /* irec index of target ext list */
1604{
1605 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1606 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1607 int erp_idx; /* indirection array index */
1608 int nlists; /* number of extent irec's (lists) */
1609 int high; /* binary search upper limit */
1610 int low; /* binary search lower limit */
1611
1612 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1613 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1614 erp_idx = 0;
1615 low = 0;
1616 high = nlists - 1;
1617 while (low <= high) {
1618 erp_idx = (low + high) >> 1;
1619 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1620 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1621 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1622 high = erp_idx - 1;
1623 } else if (erp_next && bno >=
1624 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1625 low = erp_idx + 1;
1626 } else {
1627 break;
1628 }
1629 }
1630 *erp_idxp = erp_idx;
1631 return erp;
1632}
1633
1634/*
1635 * Return a pointer to the indirection array entry containing the
1636 * extent record at file extent index *idxp. Store the index of the
1637 * target irec in *erp_idxp and store the page index of the target
1638 * extent record in *idxp.
1639 */
1640xfs_ext_irec_t *
1641xfs_iext_idx_to_irec(
1642 xfs_ifork_t *ifp, /* inode fork pointer */
1643 xfs_extnum_t *idxp, /* extent index (file -> page) */
1644 int *erp_idxp, /* pointer to target irec */
1645 int realloc) /* new bytes were just added */
1646{
1647 xfs_ext_irec_t *prev; /* pointer to previous irec */
1648 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1649 int erp_idx; /* indirection array index */
1650 int nlists; /* number of irec's (ex lists) */
1651 int high; /* binary search upper limit */
1652 int low; /* binary search lower limit */
1653 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1654
1655 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1656 ASSERT(page_idx >= 0);
1657 ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
1658 ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
1659
1660 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1661 erp_idx = 0;
1662 low = 0;
1663 high = nlists - 1;
1664
1665 /* Binary search extent irec's */
1666 while (low <= high) {
1667 erp_idx = (low + high) >> 1;
1668 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1669 prev = erp_idx > 0 ? erp - 1 : NULL;
1670 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1671 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1672 high = erp_idx - 1;
1673 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1674 (page_idx == erp->er_extoff + erp->er_extcount &&
1675 !realloc)) {
1676 low = erp_idx + 1;
1677 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1678 erp->er_extcount == XFS_LINEAR_EXTS) {
1679 ASSERT(realloc);
1680 page_idx = 0;
1681 erp_idx++;
1682 erp = erp_idx < nlists ? erp + 1 : NULL;
1683 break;
1684 } else {
1685 page_idx -= erp->er_extoff;
1686 break;
1687 }
1688 }
1689 *idxp = page_idx;
1690 *erp_idxp = erp_idx;
Eric Sandeend99831f2014-06-22 15:03:54 +10001691 return erp;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001692}
1693
1694/*
1695 * Allocate and initialize an indirection array once the space needed
1696 * for incore extents increases above XFS_IEXT_BUFSZ.
1697 */
1698void
1699xfs_iext_irec_init(
1700 xfs_ifork_t *ifp) /* inode fork pointer */
1701{
1702 xfs_ext_irec_t *erp; /* indirection array pointer */
1703 xfs_extnum_t nextents; /* number of extents in file */
1704
1705 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1706 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1707 ASSERT(nextents <= XFS_LINEAR_EXTS);
1708
1709 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1710
1711 if (nextents == 0) {
1712 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1713 } else if (!ifp->if_real_bytes) {
1714 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1715 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1716 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1717 }
1718 erp->er_extbuf = ifp->if_u1.if_extents;
1719 erp->er_extcount = nextents;
1720 erp->er_extoff = 0;
1721
1722 ifp->if_flags |= XFS_IFEXTIREC;
1723 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1724 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1725 ifp->if_u1.if_ext_irec = erp;
1726
1727 return;
1728}
1729
1730/*
1731 * Allocate and initialize a new entry in the indirection array.
1732 */
1733xfs_ext_irec_t *
1734xfs_iext_irec_new(
1735 xfs_ifork_t *ifp, /* inode fork pointer */
1736 int erp_idx) /* index for new irec */
1737{
1738 xfs_ext_irec_t *erp; /* indirection array pointer */
1739 int i; /* loop counter */
1740 int nlists; /* number of irec's (ex lists) */
1741
1742 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1743 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1744
1745 /* Resize indirection array */
1746 xfs_iext_realloc_indirect(ifp, ++nlists *
1747 sizeof(xfs_ext_irec_t));
1748 /*
1749 * Move records down in the array so the
1750 * new page can use erp_idx.
1751 */
1752 erp = ifp->if_u1.if_ext_irec;
1753 for (i = nlists - 1; i > erp_idx; i--) {
1754 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1755 }
1756 ASSERT(i == erp_idx);
1757
1758 /* Initialize new extent record */
1759 erp = ifp->if_u1.if_ext_irec;
1760 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1761 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1762 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1763 erp[erp_idx].er_extcount = 0;
1764 erp[erp_idx].er_extoff = erp_idx > 0 ?
1765 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1766 return (&erp[erp_idx]);
1767}
1768
1769/*
1770 * Remove a record from the indirection array.
1771 */
1772void
1773xfs_iext_irec_remove(
1774 xfs_ifork_t *ifp, /* inode fork pointer */
1775 int erp_idx) /* irec index to remove */
1776{
1777 xfs_ext_irec_t *erp; /* indirection array pointer */
1778 int i; /* loop counter */
1779 int nlists; /* number of irec's (ex lists) */
1780
1781 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1782 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1783 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1784 if (erp->er_extbuf) {
1785 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1786 -erp->er_extcount);
1787 kmem_free(erp->er_extbuf);
1788 }
1789 /* Compact extent records */
1790 erp = ifp->if_u1.if_ext_irec;
1791 for (i = erp_idx; i < nlists - 1; i++) {
1792 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1793 }
1794 /*
1795 * Manually free the last extent record from the indirection
1796 * array. A call to xfs_iext_realloc_indirect() with a size
1797 * of zero would result in a call to xfs_iext_destroy() which
1798 * would in turn call this function again, creating a nasty
1799 * infinite loop.
1800 */
1801 if (--nlists) {
1802 xfs_iext_realloc_indirect(ifp,
1803 nlists * sizeof(xfs_ext_irec_t));
1804 } else {
1805 kmem_free(ifp->if_u1.if_ext_irec);
1806 }
1807 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1808}
1809
1810/*
1811 * This is called to clean up large amounts of unused memory allocated
1812 * by the indirection array. Before compacting anything though, verify
1813 * that the indirection array is still needed and switch back to the
1814 * linear extent list (or even the inline buffer) if possible. The
1815 * compaction policy is as follows:
1816 *
1817 * Full Compaction: Extents fit into a single page (or inline buffer)
1818 * Partial Compaction: Extents occupy less than 50% of allocated space
1819 * No Compaction: Extents occupy at least 50% of allocated space
1820 */
1821void
1822xfs_iext_irec_compact(
1823 xfs_ifork_t *ifp) /* inode fork pointer */
1824{
1825 xfs_extnum_t nextents; /* number of extents in file */
1826 int nlists; /* number of irec's (ex lists) */
1827
1828 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1829 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1830 nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1831
1832 if (nextents == 0) {
1833 xfs_iext_destroy(ifp);
1834 } else if (nextents <= XFS_INLINE_EXTS) {
1835 xfs_iext_indirect_to_direct(ifp);
1836 xfs_iext_direct_to_inline(ifp, nextents);
1837 } else if (nextents <= XFS_LINEAR_EXTS) {
1838 xfs_iext_indirect_to_direct(ifp);
1839 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1840 xfs_iext_irec_compact_pages(ifp);
1841 }
1842}
1843
1844/*
1845 * Combine extents from neighboring extent pages.
1846 */
1847void
1848xfs_iext_irec_compact_pages(
1849 xfs_ifork_t *ifp) /* inode fork pointer */
1850{
1851 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1852 int erp_idx = 0; /* indirection array index */
1853 int nlists; /* number of irec's (ex lists) */
1854
1855 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1856 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1857 while (erp_idx < nlists - 1) {
1858 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1859 erp_next = erp + 1;
1860 if (erp_next->er_extcount <=
1861 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1862 memcpy(&erp->er_extbuf[erp->er_extcount],
1863 erp_next->er_extbuf, erp_next->er_extcount *
1864 sizeof(xfs_bmbt_rec_t));
1865 erp->er_extcount += erp_next->er_extcount;
1866 /*
1867 * Free page before removing extent record
1868 * so er_extoffs don't get modified in
1869 * xfs_iext_irec_remove.
1870 */
1871 kmem_free(erp_next->er_extbuf);
1872 erp_next->er_extbuf = NULL;
1873 xfs_iext_irec_remove(ifp, erp_idx + 1);
1874 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1875 } else {
1876 erp_idx++;
1877 }
1878 }
1879}
1880
1881/*
1882 * This is called to update the er_extoff field in the indirection
1883 * array when extents have been added or removed from one of the
1884 * extent lists. erp_idx contains the irec index to begin updating
1885 * at and ext_diff contains the number of extents that were added
1886 * or removed.
1887 */
1888void
1889xfs_iext_irec_update_extoffs(
1890 xfs_ifork_t *ifp, /* inode fork pointer */
1891 int erp_idx, /* irec index to update */
1892 int ext_diff) /* number of new extents */
1893{
1894 int i; /* loop counter */
1895 int nlists; /* number of irec's (ex lists */
1896
1897 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1898 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1899 for (i = erp_idx; i < nlists; i++) {
1900 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1901 }
1902}