blob: 911ff791a896bb6183e296ac5f1fcdf87439e4df [file] [log] [blame]
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001/*
2 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <linux/log2.h>
19
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110023#include "xfs_log_format.h"
24#include "xfs_trans_resv.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100025#include "xfs_mount.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100026#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110027#include "xfs_trans.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100028#include "xfs_inode_item.h"
Darrick J. Wongb3bf6072017-02-02 15:13:59 -080029#include "xfs_btree.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110030#include "xfs_bmap_btree.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100031#include "xfs_bmap.h"
32#include "xfs_error.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100033#include "xfs_trace.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110034#include "xfs_attr_sf.h"
Darrick J. Wong244efea2016-02-08 15:00:01 +110035#include "xfs_da_format.h"
Darrick J. Wong630a04e2017-03-15 00:24:25 -070036#include "xfs_da_btree.h"
37#include "xfs_dir2_priv.h"
Dave Chinner5c4d97d2013-08-12 20:49:33 +100038
39kmem_zone_t *xfs_ifork_zone;
40
41STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
42STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
43STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
44
Christoph Hellwig66f36462017-10-19 11:07:09 -070045static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev)
46{
47 return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
48}
49
Dave Chinner5c4d97d2013-08-12 20:49:33 +100050/*
Christoph Hellwig66f36462017-10-19 11:07:09 -070051 * Copy inode type and data and attr format specific information from the
52 * on-disk inode to the in-core inode and fork structures. For fifos, devices,
53 * and sockets this means set i_rdev to the proper value. For files,
54 * directories, and symlinks this means to bring in the in-line data or extent
55 * pointers as well as the attribute fork. For a fork in B-tree format, only
56 * the root is immediately brought in-core. The rest will be read in later when
57 * first referenced (see xfs_iread_extents()).
Dave Chinner5c4d97d2013-08-12 20:49:33 +100058 */
59int
60xfs_iformat_fork(
Christoph Hellwig66f36462017-10-19 11:07:09 -070061 struct xfs_inode *ip,
62 struct xfs_dinode *dip)
Dave Chinner5c4d97d2013-08-12 20:49:33 +100063{
Christoph Hellwig66f36462017-10-19 11:07:09 -070064 struct inode *inode = VFS_I(ip);
65 struct xfs_attr_shortform *atp;
Dave Chinner5c4d97d2013-08-12 20:49:33 +100066 int size;
67 int error = 0;
68 xfs_fsize_t di_size;
69
70 if (unlikely(be32_to_cpu(dip->di_nextents) +
71 be16_to_cpu(dip->di_anextents) >
72 be64_to_cpu(dip->di_nblocks))) {
73 xfs_warn(ip->i_mount,
74 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
75 (unsigned long long)ip->i_ino,
76 (int)(be32_to_cpu(dip->di_nextents) +
77 be16_to_cpu(dip->di_anextents)),
78 (unsigned long long)
79 be64_to_cpu(dip->di_nblocks));
80 XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
81 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +100082 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +100083 }
84
85 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
86 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
87 (unsigned long long)ip->i_ino,
88 dip->di_forkoff);
89 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
90 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +100091 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +100092 }
93
94 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
95 !ip->i_mount->m_rtdev_targp)) {
96 xfs_warn(ip->i_mount,
97 "corrupt dinode %Lu, has realtime flag set.",
98 ip->i_ino);
99 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
100 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000101 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000102 }
103
Christoph Hellwig66f36462017-10-19 11:07:09 -0700104 if (unlikely(xfs_is_reflink_inode(ip) && !S_ISREG(inode->i_mode))) {
Darrick J. Wong11715a22016-10-03 09:11:31 -0700105 xfs_warn(ip->i_mount,
106 "corrupt dinode %llu, wrong file type for reflink.",
107 ip->i_ino);
108 XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
109 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
110 return -EFSCORRUPTED;
111 }
112
113 if (unlikely(xfs_is_reflink_inode(ip) &&
114 (ip->i_d.di_flags & XFS_DIFLAG_REALTIME))) {
115 xfs_warn(ip->i_mount,
116 "corrupt dinode %llu, has reflink+realtime flag set.",
117 ip->i_ino);
118 XFS_CORRUPTION_ERROR("xfs_iformat(reflink)",
119 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
120 return -EFSCORRUPTED;
121 }
122
Christoph Hellwig66f36462017-10-19 11:07:09 -0700123 switch (inode->i_mode & S_IFMT) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000124 case S_IFIFO:
125 case S_IFCHR:
126 case S_IFBLK:
127 case S_IFSOCK:
128 if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
129 XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
130 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000131 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000132 }
133 ip->i_d.di_size = 0;
Christoph Hellwig66f36462017-10-19 11:07:09 -0700134 inode->i_rdev = xfs_to_linux_dev_t(xfs_dinode_get_rdev(dip));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000135 break;
136
137 case S_IFREG:
138 case S_IFLNK:
139 case S_IFDIR:
140 switch (dip->di_format) {
141 case XFS_DINODE_FMT_LOCAL:
142 /*
143 * no local regular files yet
144 */
145 if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
146 xfs_warn(ip->i_mount,
147 "corrupt inode %Lu (local format for regular file).",
148 (unsigned long long) ip->i_ino);
149 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
150 XFS_ERRLEVEL_LOW,
151 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000152 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000153 }
154
155 di_size = be64_to_cpu(dip->di_size);
Dan Carpenter0d0ab122013-08-15 08:53:38 +0300156 if (unlikely(di_size < 0 ||
157 di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000158 xfs_warn(ip->i_mount,
159 "corrupt inode %Lu (bad size %Ld for local inode).",
160 (unsigned long long) ip->i_ino,
161 (long long) di_size);
162 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
163 XFS_ERRLEVEL_LOW,
164 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000165 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000166 }
167
168 size = (int)di_size;
169 error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
170 break;
171 case XFS_DINODE_FMT_EXTENTS:
172 error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
173 break;
174 case XFS_DINODE_FMT_BTREE:
175 error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
176 break;
177 default:
178 XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
179 ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000180 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000181 }
182 break;
183
184 default:
185 XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
Dave Chinner24513372014-06-25 14:58:08 +1000186 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000187 }
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700188 if (error)
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000189 return error;
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700190
Darrick J. Wong005c5db2017-03-28 14:51:10 -0700191 /* Check inline dir contents. */
Christoph Hellwig66f36462017-10-19 11:07:09 -0700192 if (S_ISDIR(inode->i_mode) && dip->di_format == XFS_DINODE_FMT_LOCAL) {
Darrick J. Wong005c5db2017-03-28 14:51:10 -0700193 error = xfs_dir2_sf_verify(ip);
194 if (error) {
195 xfs_idestroy_fork(ip, XFS_DATA_FORK);
196 return error;
197 }
198 }
199
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700200 if (xfs_is_reflink_inode(ip)) {
201 ASSERT(ip->i_cowfp == NULL);
202 xfs_ifork_init_cow(ip);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000203 }
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700204
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000205 if (!XFS_DFORK_Q(dip))
206 return 0;
207
208 ASSERT(ip->i_afp == NULL);
209 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
210
211 switch (dip->di_aformat) {
212 case XFS_DINODE_FMT_LOCAL:
213 atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
214 size = be16_to_cpu(atp->hdr.totsize);
215
216 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
217 xfs_warn(ip->i_mount,
218 "corrupt inode %Lu (bad attr fork size %Ld).",
219 (unsigned long long) ip->i_ino,
220 (long long) size);
221 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
222 XFS_ERRLEVEL_LOW,
223 ip->i_mount, dip);
Darrick J. Wong11715a22016-10-03 09:11:31 -0700224 error = -EFSCORRUPTED;
225 break;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000226 }
227
228 error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
229 break;
230 case XFS_DINODE_FMT_EXTENTS:
231 error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
232 break;
233 case XFS_DINODE_FMT_BTREE:
234 error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
235 break;
236 default:
Dave Chinner24513372014-06-25 14:58:08 +1000237 error = -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000238 break;
239 }
240 if (error) {
241 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
242 ip->i_afp = NULL;
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700243 if (ip->i_cowfp)
244 kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
245 ip->i_cowfp = NULL;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000246 xfs_idestroy_fork(ip, XFS_DATA_FORK);
247 }
248 return error;
249}
250
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000251void
252xfs_init_local_fork(
253 struct xfs_inode *ip,
254 int whichfork,
255 const void *data,
256 int size)
257{
258 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
Christoph Hellwig30ee0522016-04-06 07:53:29 +1000259 int mem_size = size, real_size = 0;
260 bool zero_terminate;
261
262 /*
263 * If we are using the local fork to store a symlink body we need to
264 * zero-terminate it so that we can pass it back to the VFS directly.
265 * Overallocate the in-memory fork by one for that and add a zero
266 * to terminate it below.
267 */
268 zero_terminate = S_ISLNK(VFS_I(ip)->i_mode);
269 if (zero_terminate)
270 mem_size++;
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000271
272 if (size == 0)
273 ifp->if_u1.if_data = NULL;
Christoph Hellwig30ee0522016-04-06 07:53:29 +1000274 else if (mem_size <= sizeof(ifp->if_u2.if_inline_data))
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000275 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
276 else {
Christoph Hellwig30ee0522016-04-06 07:53:29 +1000277 real_size = roundup(mem_size, 4);
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000278 ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
279 }
280
Christoph Hellwig30ee0522016-04-06 07:53:29 +1000281 if (size) {
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000282 memcpy(ifp->if_u1.if_data, data, size);
Christoph Hellwig30ee0522016-04-06 07:53:29 +1000283 if (zero_terminate)
284 ifp->if_u1.if_data[size] = '\0';
285 }
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000286
287 ifp->if_bytes = size;
288 ifp->if_real_bytes = real_size;
289 ifp->if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
290 ifp->if_flags |= XFS_IFINLINE;
291}
292
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000293/*
294 * The file is in-lined in the on-disk inode.
295 * If it fits into if_inline_data, then copy
296 * it there, otherwise allocate a buffer for it
297 * and copy the data there. Either way, set
298 * if_data to point at the data.
299 * If we allocate a buffer for the data, make
300 * sure that its size is a multiple of 4 and
301 * record the real size in i_real_bytes.
302 */
303STATIC int
304xfs_iformat_local(
305 xfs_inode_t *ip,
306 xfs_dinode_t *dip,
307 int whichfork,
308 int size)
309{
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000310 /*
311 * If the size is unreasonable, then something
312 * is wrong and we just bail out rather than crash in
313 * kmem_alloc() or memcpy() below.
314 */
315 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
316 xfs_warn(ip->i_mount,
317 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
318 (unsigned long long) ip->i_ino, size,
319 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
320 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
321 ip->i_mount, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000322 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000323 }
Christoph Hellwig143f4ae2016-04-06 07:41:43 +1000324
325 xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000326 return 0;
327}
328
329/*
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700330 * The file consists of a set of extents all of which fit into the on-disk
331 * inode. If there are few enough extents to fit into the if_inline_ext, then
332 * copy them there. Otherwise allocate a buffer for them and copy them into it.
333 * Either way, set if_extents to point at the extents.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000334 */
335STATIC int
336xfs_iformat_extents(
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700337 struct xfs_inode *ip,
338 struct xfs_dinode *dip,
339 int whichfork)
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000340{
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700341 struct xfs_mount *mp = ip->i_mount;
342 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
Christoph Hellwige8e0e172017-10-19 11:06:29 -0700343 int state = xfs_bmap_fork_to_state(whichfork);
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700344 int nex = XFS_DFORK_NEXTENTS(dip, whichfork);
345 int size = nex * sizeof(xfs_bmbt_rec_t);
346 struct xfs_bmbt_rec *dp;
347 int i;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000348
349 /*
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700350 * If the number of extents is unreasonable, then something is wrong and
351 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000352 */
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700353 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000354 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
355 (unsigned long long) ip->i_ino, nex);
356 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700357 mp, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000358 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000359 }
360
361 ifp->if_real_bytes = 0;
362 if (nex == 0)
363 ifp->if_u1.if_extents = NULL;
364 else if (nex <= XFS_INLINE_EXTS)
365 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
366 else
367 xfs_iext_add(ifp, 0, nex);
368
369 ifp->if_bytes = size;
370 if (size) {
371 dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000372 for (i = 0; i < nex; i++, dp++) {
373 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
374 ep->l0 = get_unaligned_be64(&dp->l0);
375 ep->l1 = get_unaligned_be64(&dp->l1);
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700376 if (!xfs_bmbt_validate_extent(mp, whichfork, ep)) {
377 XFS_ERROR_REPORT("xfs_iformat_extents(2)",
378 XFS_ERRLEVEL_LOW, mp);
379 return -EFSCORRUPTED;
380 }
Christoph Hellwige8e0e172017-10-19 11:06:29 -0700381 trace_xfs_read_extent(ip, i, state, _THIS_IP_);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000382 }
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000383 }
384 ifp->if_flags |= XFS_IFEXTENTS;
385 return 0;
386}
387
388/*
389 * The file has too many extents to fit into
390 * the inode, so they are in B-tree format.
391 * Allocate a buffer for the root of the B-tree
392 * and copy the root into it. The i_extents
393 * field will remain NULL until all of the
394 * extents are read in (when they are needed).
395 */
396STATIC int
397xfs_iformat_btree(
398 xfs_inode_t *ip,
399 xfs_dinode_t *dip,
400 int whichfork)
401{
402 struct xfs_mount *mp = ip->i_mount;
403 xfs_bmdr_block_t *dfp;
404 xfs_ifork_t *ifp;
405 /* REFERENCED */
406 int nrecs;
407 int size;
Darrick J. Wongb3bf6072017-02-02 15:13:59 -0800408 int level;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000409
410 ifp = XFS_IFORK_PTR(ip, whichfork);
411 dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
412 size = XFS_BMAP_BROOT_SPACE(mp, dfp);
413 nrecs = be16_to_cpu(dfp->bb_numrecs);
Darrick J. Wongb3bf6072017-02-02 15:13:59 -0800414 level = be16_to_cpu(dfp->bb_level);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000415
416 /*
417 * blow out if -- fork has less extents than can fit in
418 * fork (fork shouldn't be a btree format), root btree
419 * block has more records than can fit into the fork,
420 * or the number of extents is greater than the number of
421 * blocks.
422 */
423 if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
424 XFS_IFORK_MAXEXT(ip, whichfork) ||
425 XFS_BMDR_SPACE_CALC(nrecs) >
426 XFS_DFORK_SIZE(dip, mp, whichfork) ||
Darrick J. Wongb3bf6072017-02-02 15:13:59 -0800427 XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
428 level == 0 || level > XFS_BTREE_MAXLEVELS) {
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000429 xfs_warn(mp, "corrupt inode %Lu (btree).",
430 (unsigned long long) ip->i_ino);
431 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
432 mp, dip);
Dave Chinner24513372014-06-25 14:58:08 +1000433 return -EFSCORRUPTED;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000434 }
435
436 ifp->if_broot_bytes = size;
437 ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
438 ASSERT(ifp->if_broot != NULL);
439 /*
440 * Copy and convert from the on-disk structure
441 * to the in-memory structure.
442 */
443 xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
444 ifp->if_broot, size);
445 ifp->if_flags &= ~XFS_IFEXTENTS;
446 ifp->if_flags |= XFS_IFBROOT;
447
448 return 0;
449}
450
451/*
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000452 * Reallocate the space for if_broot based on the number of records
453 * being added or deleted as indicated in rec_diff. Move the records
454 * and pointers in if_broot to fit the new size. When shrinking this
455 * will eliminate holes between the records and pointers created by
456 * the caller. When growing this will create holes to be filled in
457 * by the caller.
458 *
459 * The caller must not request to add more records than would fit in
460 * the on-disk inode root. If the if_broot is currently NULL, then
Zhi Yong Wuf6c27342013-08-07 10:11:04 +0000461 * if we are adding records, one will be allocated. The caller must also
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000462 * not request that the number of records go below zero, although
463 * it can go to zero.
464 *
465 * ip -- the inode whose if_broot area is changing
466 * ext_diff -- the change in the number of records, positive or negative,
467 * requested for the if_broot array.
468 */
469void
470xfs_iroot_realloc(
471 xfs_inode_t *ip,
472 int rec_diff,
473 int whichfork)
474{
475 struct xfs_mount *mp = ip->i_mount;
476 int cur_max;
477 xfs_ifork_t *ifp;
478 struct xfs_btree_block *new_broot;
479 int new_max;
480 size_t new_size;
481 char *np;
482 char *op;
483
484 /*
485 * Handle the degenerate case quietly.
486 */
487 if (rec_diff == 0) {
488 return;
489 }
490
491 ifp = XFS_IFORK_PTR(ip, whichfork);
492 if (rec_diff > 0) {
493 /*
494 * If there wasn't any memory allocated before, just
495 * allocate it now and get out.
496 */
497 if (ifp->if_broot_bytes == 0) {
498 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
499 ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
500 ifp->if_broot_bytes = (int)new_size;
501 return;
502 }
503
504 /*
505 * If there is already an existing if_broot, then we need
506 * to realloc() it and shift the pointers to their new
507 * location. The records don't change location because
508 * they are kept butted up against the btree block header.
509 */
510 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
511 new_max = cur_max + rec_diff;
512 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
513 ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000514 KM_SLEEP | KM_NOFS);
515 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
516 ifp->if_broot_bytes);
517 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
518 (int)new_size);
519 ifp->if_broot_bytes = (int)new_size;
520 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
521 XFS_IFORK_SIZE(ip, whichfork));
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000522 memmove(np, op, cur_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000523 return;
524 }
525
526 /*
527 * rec_diff is less than 0. In this case, we are shrinking the
528 * if_broot buffer. It must already exist. If we go to zero
529 * records, just get rid of the root and clear the status bit.
530 */
531 ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
532 cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
533 new_max = cur_max + rec_diff;
534 ASSERT(new_max >= 0);
535 if (new_max > 0)
536 new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
537 else
538 new_size = 0;
539 if (new_size > 0) {
540 new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
541 /*
542 * First copy over the btree block header.
543 */
544 memcpy(new_broot, ifp->if_broot,
545 XFS_BMBT_BLOCK_LEN(ip->i_mount));
546 } else {
547 new_broot = NULL;
548 ifp->if_flags &= ~XFS_IFBROOT;
549 }
550
551 /*
552 * Only copy the records and pointers if there are any.
553 */
554 if (new_max > 0) {
555 /*
556 * First copy the records.
557 */
558 op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
559 np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
560 memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
561
562 /*
563 * Then copy the pointers.
564 */
565 op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
566 ifp->if_broot_bytes);
567 np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
568 (int)new_size);
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000569 memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000570 }
571 kmem_free(ifp->if_broot);
572 ifp->if_broot = new_broot;
573 ifp->if_broot_bytes = (int)new_size;
574 if (ifp->if_broot)
575 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
576 XFS_IFORK_SIZE(ip, whichfork));
577 return;
578}
579
580
581/*
582 * This is called when the amount of space needed for if_data
583 * is increased or decreased. The change in size is indicated by
584 * the number of bytes that need to be added or deleted in the
585 * byte_diff parameter.
586 *
587 * If the amount of space needed has decreased below the size of the
588 * inline buffer, then switch to using the inline buffer. Otherwise,
589 * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
590 * to what is needed.
591 *
592 * ip -- the inode whose if_data area is changing
593 * byte_diff -- the change in the number of bytes, positive or negative,
594 * requested for the if_data array.
595 */
596void
597xfs_idata_realloc(
598 xfs_inode_t *ip,
599 int byte_diff,
600 int whichfork)
601{
602 xfs_ifork_t *ifp;
603 int new_size;
604 int real_size;
605
606 if (byte_diff == 0) {
607 return;
608 }
609
610 ifp = XFS_IFORK_PTR(ip, whichfork);
611 new_size = (int)ifp->if_bytes + byte_diff;
612 ASSERT(new_size >= 0);
613
614 if (new_size == 0) {
615 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
616 kmem_free(ifp->if_u1.if_data);
617 }
618 ifp->if_u1.if_data = NULL;
619 real_size = 0;
620 } else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
621 /*
622 * If the valid extents/data can fit in if_inline_ext/data,
623 * copy them from the malloc'd vector and free it.
624 */
625 if (ifp->if_u1.if_data == NULL) {
626 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
627 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
628 ASSERT(ifp->if_real_bytes != 0);
629 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
630 new_size);
631 kmem_free(ifp->if_u1.if_data);
632 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
633 }
634 real_size = 0;
635 } else {
636 /*
637 * Stuck with malloc/realloc.
638 * For inline data, the underlying buffer must be
639 * a multiple of 4 bytes in size so that it can be
640 * logged and stay on word boundaries. We enforce
641 * that here.
642 */
643 real_size = roundup(new_size, 4);
644 if (ifp->if_u1.if_data == NULL) {
645 ASSERT(ifp->if_real_bytes == 0);
646 ifp->if_u1.if_data = kmem_alloc(real_size,
647 KM_SLEEP | KM_NOFS);
648 } else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
649 /*
650 * Only do the realloc if the underlying size
651 * is really changing.
652 */
653 if (ifp->if_real_bytes != real_size) {
654 ifp->if_u1.if_data =
655 kmem_realloc(ifp->if_u1.if_data,
656 real_size,
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000657 KM_SLEEP | KM_NOFS);
658 }
659 } else {
660 ASSERT(ifp->if_real_bytes == 0);
661 ifp->if_u1.if_data = kmem_alloc(real_size,
662 KM_SLEEP | KM_NOFS);
663 memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
664 ifp->if_bytes);
665 }
666 }
667 ifp->if_real_bytes = real_size;
668 ifp->if_bytes = new_size;
669 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
670}
671
672void
673xfs_idestroy_fork(
674 xfs_inode_t *ip,
675 int whichfork)
676{
677 xfs_ifork_t *ifp;
678
679 ifp = XFS_IFORK_PTR(ip, whichfork);
680 if (ifp->if_broot != NULL) {
681 kmem_free(ifp->if_broot);
682 ifp->if_broot = NULL;
683 }
684
685 /*
686 * If the format is local, then we can't have an extents
687 * array so just look for an inline data array. If we're
688 * not local then we may or may not have an extents list,
689 * so check and free it up if we do.
690 */
691 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
692 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
693 (ifp->if_u1.if_data != NULL)) {
694 ASSERT(ifp->if_real_bytes != 0);
695 kmem_free(ifp->if_u1.if_data);
696 ifp->if_u1.if_data = NULL;
697 ifp->if_real_bytes = 0;
698 }
699 } else if ((ifp->if_flags & XFS_IFEXTENTS) &&
700 ((ifp->if_flags & XFS_IFEXTIREC) ||
701 ((ifp->if_u1.if_extents != NULL) &&
702 (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
703 ASSERT(ifp->if_real_bytes != 0);
704 xfs_iext_destroy(ifp);
705 }
706 ASSERT(ifp->if_u1.if_extents == NULL ||
707 ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
708 ASSERT(ifp->if_real_bytes == 0);
709 if (whichfork == XFS_ATTR_FORK) {
710 kmem_zone_free(xfs_ifork_zone, ip->i_afp);
711 ip->i_afp = NULL;
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700712 } else if (whichfork == XFS_COW_FORK) {
713 kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
714 ip->i_cowfp = NULL;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000715 }
716}
717
Eric Sandeen5d829302016-11-08 12:59:42 +1100718/* Count number of incore extents based on if_bytes */
719xfs_extnum_t
720xfs_iext_count(struct xfs_ifork *ifp)
721{
722 return ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
723}
724
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000725/*
Christoph Hellwigda776502013-12-13 11:34:04 +1100726 * Convert in-core extents to on-disk form
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000727 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100728 * For either the data or attr fork in extent format, we need to endian convert
729 * the in-core extent as we place them into the on-disk inode.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000730 *
Christoph Hellwigda776502013-12-13 11:34:04 +1100731 * In the case of the data fork, the in-core and on-disk fork sizes can be
732 * different due to delayed allocation extents. We only copy on-disk extents
733 * here, so callers must always use the physical fork size to determine the
734 * size of the buffer passed to this routine. We will return the size actually
735 * used.
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000736 */
737int
738xfs_iextents_copy(
739 xfs_inode_t *ip,
740 xfs_bmbt_rec_t *dp,
741 int whichfork)
742{
Christoph Hellwige8e0e172017-10-19 11:06:29 -0700743 int state = xfs_bmap_fork_to_state(whichfork);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000744 int copied;
745 int i;
746 xfs_ifork_t *ifp;
747 int nrecs;
748 xfs_fsblock_t start_block;
749
750 ifp = XFS_IFORK_PTR(ip, whichfork);
751 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
752 ASSERT(ifp->if_bytes > 0);
753
Eric Sandeen5d829302016-11-08 12:59:42 +1100754 nrecs = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000755 ASSERT(nrecs > 0);
756
757 /*
758 * There are some delayed allocation extents in the
759 * inode, so copy the extents one at a time and skip
760 * the delayed ones. There must be at least one
761 * non-delayed extent.
762 */
763 copied = 0;
764 for (i = 0; i < nrecs; i++) {
765 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
Christoph Hellwig0c1d9e42017-04-20 09:42:48 -0700766
767 ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, ep));
768
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000769 start_block = xfs_bmbt_get_startblock(ep);
770 if (isnullstartblock(start_block)) {
771 /*
772 * It's a delayed allocation extent, so skip it.
773 */
774 continue;
775 }
776
Christoph Hellwige8e0e172017-10-19 11:06:29 -0700777 trace_xfs_write_extent(ip, i, state, _RET_IP_);
778
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000779 /* Translate to on disk format */
Dave Chinnerc5c249b2013-08-12 20:49:43 +1000780 put_unaligned_be64(ep->l0, &dp->l0);
781 put_unaligned_be64(ep->l1, &dp->l1);
Christoph Hellwige8e0e172017-10-19 11:06:29 -0700782
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000783 dp++;
784 copied++;
785 }
786 ASSERT(copied != 0);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000787
788 return (copied * (uint)sizeof(xfs_bmbt_rec_t));
789}
790
791/*
792 * Each of the following cases stores data into the same region
793 * of the on-disk inode, so only one of them can be valid at
794 * any given time. While it is possible to have conflicting formats
795 * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
796 * in EXTENTS format, this can only happen when the fork has
797 * changed formats after being modified but before being flushed.
798 * In these cases, the format always takes precedence, because the
799 * format indicates the current state of the fork.
800 */
Darrick J. Wong005c5db2017-03-28 14:51:10 -0700801void
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000802xfs_iflush_fork(
803 xfs_inode_t *ip,
804 xfs_dinode_t *dip,
805 xfs_inode_log_item_t *iip,
Eric Sandeenfd9fdba2014-04-14 19:04:46 +1000806 int whichfork)
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000807{
808 char *cp;
809 xfs_ifork_t *ifp;
810 xfs_mount_t *mp;
811 static const short brootflag[2] =
812 { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
813 static const short dataflag[2] =
814 { XFS_ILOG_DDATA, XFS_ILOG_ADATA };
815 static const short extflag[2] =
816 { XFS_ILOG_DEXT, XFS_ILOG_AEXT };
817
818 if (!iip)
Darrick J. Wong005c5db2017-03-28 14:51:10 -0700819 return;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000820 ifp = XFS_IFORK_PTR(ip, whichfork);
821 /*
822 * This can happen if we gave up in iformat in an error path,
823 * for the attribute fork.
824 */
825 if (!ifp) {
826 ASSERT(whichfork == XFS_ATTR_FORK);
Darrick J. Wong005c5db2017-03-28 14:51:10 -0700827 return;
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000828 }
829 cp = XFS_DFORK_PTR(dip, whichfork);
830 mp = ip->i_mount;
831 switch (XFS_IFORK_FORMAT(ip, whichfork)) {
832 case XFS_DINODE_FMT_LOCAL:
833 if ((iip->ili_fields & dataflag[whichfork]) &&
834 (ifp->if_bytes > 0)) {
835 ASSERT(ifp->if_u1.if_data != NULL);
836 ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
837 memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
838 }
839 break;
840
841 case XFS_DINODE_FMT_EXTENTS:
842 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
843 !(iip->ili_fields & extflag[whichfork]));
844 if ((iip->ili_fields & extflag[whichfork]) &&
845 (ifp->if_bytes > 0)) {
846 ASSERT(xfs_iext_get_ext(ifp, 0));
847 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
848 (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
849 whichfork);
850 }
851 break;
852
853 case XFS_DINODE_FMT_BTREE:
854 if ((iip->ili_fields & brootflag[whichfork]) &&
855 (ifp->if_broot_bytes > 0)) {
856 ASSERT(ifp->if_broot != NULL);
857 ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
858 XFS_IFORK_SIZE(ip, whichfork));
859 xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
860 (xfs_bmdr_block_t *)cp,
861 XFS_DFORK_SIZE(dip, mp, whichfork));
862 }
863 break;
864
865 case XFS_DINODE_FMT_DEV:
866 if (iip->ili_fields & XFS_ILOG_DEV) {
867 ASSERT(whichfork == XFS_DATA_FORK);
Christoph Hellwig66f36462017-10-19 11:07:09 -0700868 xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000869 }
870 break;
871
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000872 default:
873 ASSERT(0);
874 break;
875 }
876}
877
878/*
879 * Return a pointer to the extent record at file index idx.
880 */
881xfs_bmbt_rec_host_t *
882xfs_iext_get_ext(
883 xfs_ifork_t *ifp, /* inode fork pointer */
884 xfs_extnum_t idx) /* index of target extent */
885{
886 ASSERT(idx >= 0);
Eric Sandeen5d829302016-11-08 12:59:42 +1100887 ASSERT(idx < xfs_iext_count(ifp));
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000888
889 if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
890 return ifp->if_u1.if_ext_irec->er_extbuf;
891 } else if (ifp->if_flags & XFS_IFEXTIREC) {
892 xfs_ext_irec_t *erp; /* irec pointer */
893 int erp_idx = 0; /* irec index */
894 xfs_extnum_t page_idx = idx; /* ext index in target list */
895
896 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
897 return &erp->er_extbuf[page_idx];
898 } else if (ifp->if_bytes) {
899 return &ifp->if_u1.if_extents[idx];
900 } else {
901 return NULL;
902 }
903}
904
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700905/* Convert bmap state flags to an inode fork. */
906struct xfs_ifork *
907xfs_iext_state_to_fork(
908 struct xfs_inode *ip,
909 int state)
910{
911 if (state & BMAP_COWFORK)
912 return ip->i_cowfp;
913 else if (state & BMAP_ATTRFORK)
914 return ip->i_afp;
915 return &ip->i_df;
916}
917
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000918/*
919 * Insert new item(s) into the extent records for incore inode
920 * fork 'ifp'. 'count' new items are inserted at index 'idx'.
921 */
922void
923xfs_iext_insert(
924 xfs_inode_t *ip, /* incore inode pointer */
925 xfs_extnum_t idx, /* starting index of new items */
926 xfs_extnum_t count, /* number of inserted items */
927 xfs_bmbt_irec_t *new, /* items to insert */
928 int state) /* type of extent conversion */
929{
Darrick J. Wong3993bae2016-10-03 09:11:32 -0700930 xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000931 xfs_extnum_t i; /* extent record index */
932
933 trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
934
935 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
936 xfs_iext_add(ifp, idx, count);
937 for (i = idx; i < idx + count; i++, new++)
938 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
939}
940
941/*
942 * This is called when the amount of space required for incore file
943 * extents needs to be increased. The ext_diff parameter stores the
944 * number of new extents being added and the idx parameter contains
945 * the extent index where the new extents will be added. If the new
946 * extents are being appended, then we just need to (re)allocate and
947 * initialize the space. Otherwise, if the new extents are being
948 * inserted into the middle of the existing entries, a bit more work
949 * is required to make room for the new extents to be inserted. The
950 * caller is responsible for filling in the new extent entries upon
951 * return.
952 */
953void
954xfs_iext_add(
955 xfs_ifork_t *ifp, /* inode fork pointer */
956 xfs_extnum_t idx, /* index to begin adding exts */
957 int ext_diff) /* number of extents to add */
958{
959 int byte_diff; /* new bytes being added */
960 int new_size; /* size of extents after adding */
961 xfs_extnum_t nextents; /* number of extents in file */
962
Eric Sandeen5d829302016-11-08 12:59:42 +1100963 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +1000964 ASSERT((idx >= 0) && (idx <= nextents));
965 byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
966 new_size = ifp->if_bytes + byte_diff;
967 /*
968 * If the new number of extents (nextents + ext_diff)
969 * fits inside the inode, then continue to use the inline
970 * extent buffer.
971 */
972 if (nextents + ext_diff <= XFS_INLINE_EXTS) {
973 if (idx < nextents) {
974 memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
975 &ifp->if_u2.if_inline_ext[idx],
976 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
977 memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
978 }
979 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
980 ifp->if_real_bytes = 0;
981 }
982 /*
983 * Otherwise use a linear (direct) extent list.
984 * If the extents are currently inside the inode,
985 * xfs_iext_realloc_direct will switch us from
986 * inline to direct extent allocation mode.
987 */
988 else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
989 xfs_iext_realloc_direct(ifp, new_size);
990 if (idx < nextents) {
991 memmove(&ifp->if_u1.if_extents[idx + ext_diff],
992 &ifp->if_u1.if_extents[idx],
993 (nextents - idx) * sizeof(xfs_bmbt_rec_t));
994 memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
995 }
996 }
997 /* Indirection array */
998 else {
999 xfs_ext_irec_t *erp;
1000 int erp_idx = 0;
1001 int page_idx = idx;
1002
1003 ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
1004 if (ifp->if_flags & XFS_IFEXTIREC) {
1005 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
1006 } else {
1007 xfs_iext_irec_init(ifp);
1008 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1009 erp = ifp->if_u1.if_ext_irec;
1010 }
1011 /* Extents fit in target extent page */
1012 if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
1013 if (page_idx < erp->er_extcount) {
1014 memmove(&erp->er_extbuf[page_idx + ext_diff],
1015 &erp->er_extbuf[page_idx],
1016 (erp->er_extcount - page_idx) *
1017 sizeof(xfs_bmbt_rec_t));
1018 memset(&erp->er_extbuf[page_idx], 0, byte_diff);
1019 }
1020 erp->er_extcount += ext_diff;
1021 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1022 }
1023 /* Insert a new extent page */
1024 else if (erp) {
1025 xfs_iext_add_indirect_multi(ifp,
1026 erp_idx, page_idx, ext_diff);
1027 }
1028 /*
1029 * If extent(s) are being appended to the last page in
1030 * the indirection array and the new extent(s) don't fit
1031 * in the page, then erp is NULL and erp_idx is set to
1032 * the next index needed in the indirection array.
1033 */
1034 else {
Jie Liubb86d212013-10-25 14:52:44 +08001035 uint count = ext_diff;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001036
1037 while (count) {
1038 erp = xfs_iext_irec_new(ifp, erp_idx);
Jie Liubb86d212013-10-25 14:52:44 +08001039 erp->er_extcount = min(count, XFS_LINEAR_EXTS);
1040 count -= erp->er_extcount;
1041 if (count)
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001042 erp_idx++;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001043 }
1044 }
1045 }
1046 ifp->if_bytes = new_size;
1047}
1048
1049/*
1050 * This is called when incore extents are being added to the indirection
1051 * array and the new extents do not fit in the target extent list. The
1052 * erp_idx parameter contains the irec index for the target extent list
1053 * in the indirection array, and the idx parameter contains the extent
1054 * index within the list. The number of extents being added is stored
1055 * in the count parameter.
1056 *
1057 * |-------| |-------|
1058 * | | | | idx - number of extents before idx
1059 * | idx | | count |
1060 * | | | | count - number of extents being inserted at idx
1061 * |-------| |-------|
1062 * | count | | nex2 | nex2 - number of extents after idx + count
1063 * |-------| |-------|
1064 */
1065void
1066xfs_iext_add_indirect_multi(
1067 xfs_ifork_t *ifp, /* inode fork pointer */
1068 int erp_idx, /* target extent irec index */
1069 xfs_extnum_t idx, /* index within target list */
1070 int count) /* new extents being added */
1071{
1072 int byte_diff; /* new bytes being added */
1073 xfs_ext_irec_t *erp; /* pointer to irec entry */
1074 xfs_extnum_t ext_diff; /* number of extents to add */
1075 xfs_extnum_t ext_cnt; /* new extents still needed */
1076 xfs_extnum_t nex2; /* extents after idx + count */
1077 xfs_bmbt_rec_t *nex2_ep = NULL; /* temp list for nex2 extents */
1078 int nlists; /* number of irec's (lists) */
1079
1080 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1081 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1082 nex2 = erp->er_extcount - idx;
1083 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1084
1085 /*
1086 * Save second part of target extent list
1087 * (all extents past */
1088 if (nex2) {
1089 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1090 nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
1091 memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
1092 erp->er_extcount -= nex2;
1093 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
1094 memset(&erp->er_extbuf[idx], 0, byte_diff);
1095 }
1096
1097 /*
1098 * Add the new extents to the end of the target
1099 * list, then allocate new irec record(s) and
1100 * extent buffer(s) as needed to store the rest
1101 * of the new extents.
1102 */
1103 ext_cnt = count;
1104 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
1105 if (ext_diff) {
1106 erp->er_extcount += ext_diff;
1107 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1108 ext_cnt -= ext_diff;
1109 }
1110 while (ext_cnt) {
1111 erp_idx++;
1112 erp = xfs_iext_irec_new(ifp, erp_idx);
1113 ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
1114 erp->er_extcount = ext_diff;
1115 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
1116 ext_cnt -= ext_diff;
1117 }
1118
1119 /* Add nex2 extents back to indirection array */
1120 if (nex2) {
1121 xfs_extnum_t ext_avail;
1122 int i;
1123
1124 byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
1125 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
1126 i = 0;
1127 /*
1128 * If nex2 extents fit in the current page, append
1129 * nex2_ep after the new extents.
1130 */
1131 if (nex2 <= ext_avail) {
1132 i = erp->er_extcount;
1133 }
1134 /*
1135 * Otherwise, check if space is available in the
1136 * next page.
1137 */
1138 else if ((erp_idx < nlists - 1) &&
1139 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
1140 ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
1141 erp_idx++;
1142 erp++;
1143 /* Create a hole for nex2 extents */
1144 memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
1145 erp->er_extcount * sizeof(xfs_bmbt_rec_t));
1146 }
1147 /*
1148 * Final choice, create a new extent page for
1149 * nex2 extents.
1150 */
1151 else {
1152 erp_idx++;
1153 erp = xfs_iext_irec_new(ifp, erp_idx);
1154 }
1155 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
1156 kmem_free(nex2_ep);
1157 erp->er_extcount += nex2;
1158 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
1159 }
1160}
1161
1162/*
1163 * This is called when the amount of space required for incore file
1164 * extents needs to be decreased. The ext_diff parameter stores the
1165 * number of extents to be removed and the idx parameter contains
1166 * the extent index where the extents will be removed from.
1167 *
1168 * If the amount of space needed has decreased below the linear
1169 * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
1170 * extent array. Otherwise, use kmem_realloc() to adjust the
1171 * size to what is needed.
1172 */
1173void
1174xfs_iext_remove(
1175 xfs_inode_t *ip, /* incore inode pointer */
1176 xfs_extnum_t idx, /* index to begin removing exts */
1177 int ext_diff, /* number of extents to remove */
1178 int state) /* type of extent conversion */
1179{
Darrick J. Wong3993bae2016-10-03 09:11:32 -07001180 xfs_ifork_t *ifp = xfs_iext_state_to_fork(ip, state);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001181 xfs_extnum_t nextents; /* number of extents in file */
1182 int new_size; /* size of extents after removal */
1183
1184 trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
1185
1186 ASSERT(ext_diff > 0);
Eric Sandeen5d829302016-11-08 12:59:42 +11001187 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001188 new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
1189
1190 if (new_size == 0) {
1191 xfs_iext_destroy(ifp);
1192 } else if (ifp->if_flags & XFS_IFEXTIREC) {
1193 xfs_iext_remove_indirect(ifp, idx, ext_diff);
1194 } else if (ifp->if_real_bytes) {
1195 xfs_iext_remove_direct(ifp, idx, ext_diff);
1196 } else {
1197 xfs_iext_remove_inline(ifp, idx, ext_diff);
1198 }
1199 ifp->if_bytes = new_size;
1200}
1201
1202/*
1203 * This removes ext_diff extents from the inline buffer, beginning
1204 * at extent index idx.
1205 */
1206void
1207xfs_iext_remove_inline(
1208 xfs_ifork_t *ifp, /* inode fork pointer */
1209 xfs_extnum_t idx, /* index to begin removing exts */
1210 int ext_diff) /* number of extents to remove */
1211{
1212 int nextents; /* number of extents in file */
1213
1214 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1215 ASSERT(idx < XFS_INLINE_EXTS);
Eric Sandeen5d829302016-11-08 12:59:42 +11001216 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001217 ASSERT(((nextents - ext_diff) > 0) &&
1218 (nextents - ext_diff) < XFS_INLINE_EXTS);
1219
1220 if (idx + ext_diff < nextents) {
1221 memmove(&ifp->if_u2.if_inline_ext[idx],
1222 &ifp->if_u2.if_inline_ext[idx + ext_diff],
1223 (nextents - (idx + ext_diff)) *
1224 sizeof(xfs_bmbt_rec_t));
1225 memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
1226 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1227 } else {
1228 memset(&ifp->if_u2.if_inline_ext[idx], 0,
1229 ext_diff * sizeof(xfs_bmbt_rec_t));
1230 }
1231}
1232
1233/*
1234 * This removes ext_diff extents from a linear (direct) extent list,
1235 * beginning at extent index idx. If the extents are being removed
1236 * from the end of the list (ie. truncate) then we just need to re-
1237 * allocate the list to remove the extra space. Otherwise, if the
1238 * extents are being removed from the middle of the existing extent
1239 * entries, then we first need to move the extent records beginning
1240 * at idx + ext_diff up in the list to overwrite the records being
1241 * removed, then remove the extra space via kmem_realloc.
1242 */
1243void
1244xfs_iext_remove_direct(
1245 xfs_ifork_t *ifp, /* inode fork pointer */
1246 xfs_extnum_t idx, /* index to begin removing exts */
1247 int ext_diff) /* number of extents to remove */
1248{
1249 xfs_extnum_t nextents; /* number of extents in file */
1250 int new_size; /* size of extents after removal */
1251
1252 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
1253 new_size = ifp->if_bytes -
1254 (ext_diff * sizeof(xfs_bmbt_rec_t));
Eric Sandeen5d829302016-11-08 12:59:42 +11001255 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001256
1257 if (new_size == 0) {
1258 xfs_iext_destroy(ifp);
1259 return;
1260 }
1261 /* Move extents up in the list (if needed) */
1262 if (idx + ext_diff < nextents) {
1263 memmove(&ifp->if_u1.if_extents[idx],
1264 &ifp->if_u1.if_extents[idx + ext_diff],
1265 (nextents - (idx + ext_diff)) *
1266 sizeof(xfs_bmbt_rec_t));
1267 }
1268 memset(&ifp->if_u1.if_extents[nextents - ext_diff],
1269 0, ext_diff * sizeof(xfs_bmbt_rec_t));
1270 /*
1271 * Reallocate the direct extent list. If the extents
1272 * will fit inside the inode then xfs_iext_realloc_direct
1273 * will switch from direct to inline extent allocation
1274 * mode for us.
1275 */
1276 xfs_iext_realloc_direct(ifp, new_size);
1277 ifp->if_bytes = new_size;
1278}
1279
1280/*
1281 * This is called when incore extents are being removed from the
1282 * indirection array and the extents being removed span multiple extent
1283 * buffers. The idx parameter contains the file extent index where we
1284 * want to begin removing extents, and the count parameter contains
1285 * how many extents need to be removed.
1286 *
1287 * |-------| |-------|
1288 * | nex1 | | | nex1 - number of extents before idx
1289 * |-------| | count |
1290 * | | | | count - number of extents being removed at idx
1291 * | count | |-------|
1292 * | | | nex2 | nex2 - number of extents after idx + count
1293 * |-------| |-------|
1294 */
1295void
1296xfs_iext_remove_indirect(
1297 xfs_ifork_t *ifp, /* inode fork pointer */
1298 xfs_extnum_t idx, /* index to begin removing extents */
1299 int count) /* number of extents to remove */
1300{
1301 xfs_ext_irec_t *erp; /* indirection array pointer */
1302 int erp_idx = 0; /* indirection array index */
1303 xfs_extnum_t ext_cnt; /* extents left to remove */
1304 xfs_extnum_t ext_diff; /* extents to remove in current list */
1305 xfs_extnum_t nex1; /* number of extents before idx */
1306 xfs_extnum_t nex2; /* extents after idx + count */
1307 int page_idx = idx; /* index in target extent list */
1308
1309 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1310 erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
1311 ASSERT(erp != NULL);
1312 nex1 = page_idx;
1313 ext_cnt = count;
1314 while (ext_cnt) {
1315 nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
1316 ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
1317 /*
1318 * Check for deletion of entire list;
1319 * xfs_iext_irec_remove() updates extent offsets.
1320 */
1321 if (ext_diff == erp->er_extcount) {
1322 xfs_iext_irec_remove(ifp, erp_idx);
1323 ext_cnt -= ext_diff;
1324 nex1 = 0;
1325 if (ext_cnt) {
1326 ASSERT(erp_idx < ifp->if_real_bytes /
1327 XFS_IEXT_BUFSZ);
1328 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1329 nex1 = 0;
1330 continue;
1331 } else {
1332 break;
1333 }
1334 }
1335 /* Move extents up (if needed) */
1336 if (nex2) {
1337 memmove(&erp->er_extbuf[nex1],
1338 &erp->er_extbuf[nex1 + ext_diff],
1339 nex2 * sizeof(xfs_bmbt_rec_t));
1340 }
1341 /* Zero out rest of page */
1342 memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
1343 ((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
1344 /* Update remaining counters */
1345 erp->er_extcount -= ext_diff;
1346 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
1347 ext_cnt -= ext_diff;
1348 nex1 = 0;
1349 erp_idx++;
1350 erp++;
1351 }
1352 ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
1353 xfs_iext_irec_compact(ifp);
1354}
1355
1356/*
1357 * Create, destroy, or resize a linear (direct) block of extents.
1358 */
1359void
1360xfs_iext_realloc_direct(
1361 xfs_ifork_t *ifp, /* inode fork pointer */
Jie Liu17ec81c2013-09-22 16:25:15 +08001362 int new_size) /* new size of extents after adding */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001363{
1364 int rnew_size; /* real new size of extents */
1365
1366 rnew_size = new_size;
1367
1368 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
1369 ((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
1370 (new_size != ifp->if_real_bytes)));
1371
1372 /* Free extent records */
1373 if (new_size == 0) {
1374 xfs_iext_destroy(ifp);
1375 }
1376 /* Resize direct extent list and zero any new bytes */
1377 else if (ifp->if_real_bytes) {
1378 /* Check if extents will fit inside the inode */
1379 if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
1380 xfs_iext_direct_to_inline(ifp, new_size /
1381 (uint)sizeof(xfs_bmbt_rec_t));
1382 ifp->if_bytes = new_size;
1383 return;
1384 }
1385 if (!is_power_of_2(new_size)){
1386 rnew_size = roundup_pow_of_two(new_size);
1387 }
1388 if (rnew_size != ifp->if_real_bytes) {
1389 ifp->if_u1.if_extents =
1390 kmem_realloc(ifp->if_u1.if_extents,
Christoph Hellwig664b60f2016-04-06 09:47:01 +10001391 rnew_size, KM_NOFS);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001392 }
1393 if (rnew_size > ifp->if_real_bytes) {
1394 memset(&ifp->if_u1.if_extents[ifp->if_bytes /
1395 (uint)sizeof(xfs_bmbt_rec_t)], 0,
1396 rnew_size - ifp->if_real_bytes);
1397 }
1398 }
Jie Liu17ec81c2013-09-22 16:25:15 +08001399 /* Switch from the inline extent buffer to a direct extent list */
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001400 else {
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001401 if (!is_power_of_2(new_size)) {
1402 rnew_size = roundup_pow_of_two(new_size);
1403 }
1404 xfs_iext_inline_to_direct(ifp, rnew_size);
1405 }
1406 ifp->if_real_bytes = rnew_size;
1407 ifp->if_bytes = new_size;
1408}
1409
1410/*
1411 * Switch from linear (direct) extent records to inline buffer.
1412 */
1413void
1414xfs_iext_direct_to_inline(
1415 xfs_ifork_t *ifp, /* inode fork pointer */
1416 xfs_extnum_t nextents) /* number of extents in file */
1417{
1418 ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1419 ASSERT(nextents <= XFS_INLINE_EXTS);
1420 /*
1421 * The inline buffer was zeroed when we switched
1422 * from inline to direct extent allocation mode,
1423 * so we don't need to clear it here.
1424 */
1425 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
1426 nextents * sizeof(xfs_bmbt_rec_t));
1427 kmem_free(ifp->if_u1.if_extents);
1428 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
1429 ifp->if_real_bytes = 0;
1430}
1431
1432/*
1433 * Switch from inline buffer to linear (direct) extent records.
1434 * new_size should already be rounded up to the next power of 2
1435 * by the caller (when appropriate), so use new_size as it is.
1436 * However, since new_size may be rounded up, we can't update
1437 * if_bytes here. It is the caller's responsibility to update
1438 * if_bytes upon return.
1439 */
1440void
1441xfs_iext_inline_to_direct(
1442 xfs_ifork_t *ifp, /* inode fork pointer */
1443 int new_size) /* number of extents in file */
1444{
1445 ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
1446 memset(ifp->if_u1.if_extents, 0, new_size);
1447 if (ifp->if_bytes) {
1448 memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
1449 ifp->if_bytes);
1450 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1451 sizeof(xfs_bmbt_rec_t));
1452 }
1453 ifp->if_real_bytes = new_size;
1454}
1455
1456/*
1457 * Resize an extent indirection array to new_size bytes.
1458 */
1459STATIC void
1460xfs_iext_realloc_indirect(
1461 xfs_ifork_t *ifp, /* inode fork pointer */
1462 int new_size) /* new indirection array size */
1463{
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001464 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001465 ASSERT(ifp->if_real_bytes);
Darrick J. Wong7bf7a192017-08-31 15:11:06 -07001466 ASSERT((new_size >= 0) &&
1467 (new_size != ((ifp->if_real_bytes / XFS_IEXT_BUFSZ) *
1468 sizeof(xfs_ext_irec_t))));
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001469 if (new_size == 0) {
1470 xfs_iext_destroy(ifp);
1471 } else {
Christoph Hellwig664b60f2016-04-06 09:47:01 +10001472 ifp->if_u1.if_ext_irec =
1473 kmem_realloc(ifp->if_u1.if_ext_irec, new_size, KM_NOFS);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001474 }
1475}
1476
1477/*
1478 * Switch from indirection array to linear (direct) extent allocations.
1479 */
1480STATIC void
1481xfs_iext_indirect_to_direct(
1482 xfs_ifork_t *ifp) /* inode fork pointer */
1483{
1484 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1485 xfs_extnum_t nextents; /* number of extents in file */
1486 int size; /* size of file extents */
1487
1488 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
Eric Sandeen5d829302016-11-08 12:59:42 +11001489 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001490 ASSERT(nextents <= XFS_LINEAR_EXTS);
1491 size = nextents * sizeof(xfs_bmbt_rec_t);
1492
1493 xfs_iext_irec_compact_pages(ifp);
1494 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
1495
1496 ep = ifp->if_u1.if_ext_irec->er_extbuf;
1497 kmem_free(ifp->if_u1.if_ext_irec);
1498 ifp->if_flags &= ~XFS_IFEXTIREC;
1499 ifp->if_u1.if_extents = ep;
1500 ifp->if_bytes = size;
1501 if (nextents < XFS_LINEAR_EXTS) {
1502 xfs_iext_realloc_direct(ifp, size);
1503 }
1504}
1505
1506/*
Alex Lyakas32b43ab2016-05-18 14:01:52 +10001507 * Remove all records from the indirection array.
1508 */
1509STATIC void
1510xfs_iext_irec_remove_all(
1511 struct xfs_ifork *ifp)
1512{
1513 int nlists;
1514 int i;
1515
1516 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1517 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1518 for (i = 0; i < nlists; i++)
1519 kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
1520 kmem_free(ifp->if_u1.if_ext_irec);
1521 ifp->if_flags &= ~XFS_IFEXTIREC;
1522}
1523
1524/*
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001525 * Free incore file extents.
1526 */
1527void
1528xfs_iext_destroy(
1529 xfs_ifork_t *ifp) /* inode fork pointer */
1530{
1531 if (ifp->if_flags & XFS_IFEXTIREC) {
Alex Lyakas32b43ab2016-05-18 14:01:52 +10001532 xfs_iext_irec_remove_all(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001533 } else if (ifp->if_real_bytes) {
1534 kmem_free(ifp->if_u1.if_extents);
1535 } else if (ifp->if_bytes) {
1536 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
1537 sizeof(xfs_bmbt_rec_t));
1538 }
1539 ifp->if_u1.if_extents = NULL;
1540 ifp->if_real_bytes = 0;
1541 ifp->if_bytes = 0;
1542}
1543
1544/*
1545 * Return a pointer to the extent record for file system block bno.
1546 */
1547xfs_bmbt_rec_host_t * /* pointer to found extent record */
1548xfs_iext_bno_to_ext(
1549 xfs_ifork_t *ifp, /* inode fork pointer */
1550 xfs_fileoff_t bno, /* block number to search for */
1551 xfs_extnum_t *idxp) /* index of target extent */
1552{
1553 xfs_bmbt_rec_host_t *base; /* pointer to first extent */
1554 xfs_filblks_t blockcount = 0; /* number of blocks in extent */
1555 xfs_bmbt_rec_host_t *ep = NULL; /* pointer to target extent */
1556 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1557 int high; /* upper boundary in search */
1558 xfs_extnum_t idx = 0; /* index of target extent */
1559 int low; /* lower boundary in search */
1560 xfs_extnum_t nextents; /* number of file extents */
1561 xfs_fileoff_t startoff = 0; /* start offset of extent */
1562
Eric Sandeen5d829302016-11-08 12:59:42 +11001563 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001564 if (nextents == 0) {
1565 *idxp = 0;
1566 return NULL;
1567 }
1568 low = 0;
1569 if (ifp->if_flags & XFS_IFEXTIREC) {
1570 /* Find target extent list */
1571 int erp_idx = 0;
1572 erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
1573 base = erp->er_extbuf;
1574 high = erp->er_extcount - 1;
1575 } else {
1576 base = ifp->if_u1.if_extents;
1577 high = nextents - 1;
1578 }
1579 /* Binary search extent records */
1580 while (low <= high) {
1581 idx = (low + high) >> 1;
1582 ep = base + idx;
1583 startoff = xfs_bmbt_get_startoff(ep);
1584 blockcount = xfs_bmbt_get_blockcount(ep);
1585 if (bno < startoff) {
1586 high = idx - 1;
1587 } else if (bno >= startoff + blockcount) {
1588 low = idx + 1;
1589 } else {
1590 /* Convert back to file-based extent index */
1591 if (ifp->if_flags & XFS_IFEXTIREC) {
1592 idx += erp->er_extoff;
1593 }
1594 *idxp = idx;
1595 return ep;
1596 }
1597 }
1598 /* Convert back to file-based extent index */
1599 if (ifp->if_flags & XFS_IFEXTIREC) {
1600 idx += erp->er_extoff;
1601 }
1602 if (bno >= startoff + blockcount) {
1603 if (++idx == nextents) {
1604 ep = NULL;
1605 } else {
1606 ep = xfs_iext_get_ext(ifp, idx);
1607 }
1608 }
1609 *idxp = idx;
1610 return ep;
1611}
1612
1613/*
1614 * Return a pointer to the indirection array entry containing the
1615 * extent record for filesystem block bno. Store the index of the
1616 * target irec in *erp_idxp.
1617 */
1618xfs_ext_irec_t * /* pointer to found extent record */
1619xfs_iext_bno_to_irec(
1620 xfs_ifork_t *ifp, /* inode fork pointer */
1621 xfs_fileoff_t bno, /* block number to search for */
1622 int *erp_idxp) /* irec index of target ext list */
1623{
1624 xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
1625 xfs_ext_irec_t *erp_next; /* next indirection array entry */
1626 int erp_idx; /* indirection array index */
1627 int nlists; /* number of extent irec's (lists) */
1628 int high; /* binary search upper limit */
1629 int low; /* binary search lower limit */
1630
1631 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1632 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1633 erp_idx = 0;
1634 low = 0;
1635 high = nlists - 1;
1636 while (low <= high) {
1637 erp_idx = (low + high) >> 1;
1638 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1639 erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
1640 if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
1641 high = erp_idx - 1;
1642 } else if (erp_next && bno >=
1643 xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
1644 low = erp_idx + 1;
1645 } else {
1646 break;
1647 }
1648 }
1649 *erp_idxp = erp_idx;
1650 return erp;
1651}
1652
1653/*
1654 * Return a pointer to the indirection array entry containing the
1655 * extent record at file extent index *idxp. Store the index of the
1656 * target irec in *erp_idxp and store the page index of the target
1657 * extent record in *idxp.
1658 */
1659xfs_ext_irec_t *
1660xfs_iext_idx_to_irec(
1661 xfs_ifork_t *ifp, /* inode fork pointer */
1662 xfs_extnum_t *idxp, /* extent index (file -> page) */
1663 int *erp_idxp, /* pointer to target irec */
1664 int realloc) /* new bytes were just added */
1665{
1666 xfs_ext_irec_t *prev; /* pointer to previous irec */
1667 xfs_ext_irec_t *erp = NULL; /* pointer to current irec */
1668 int erp_idx; /* indirection array index */
1669 int nlists; /* number of irec's (ex lists) */
1670 int high; /* binary search upper limit */
1671 int low; /* binary search lower limit */
1672 xfs_extnum_t page_idx = *idxp; /* extent index in target list */
1673
1674 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1675 ASSERT(page_idx >= 0);
Eric Sandeen5d829302016-11-08 12:59:42 +11001676 ASSERT(page_idx <= xfs_iext_count(ifp));
1677 ASSERT(page_idx < xfs_iext_count(ifp) || realloc);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001678
1679 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1680 erp_idx = 0;
1681 low = 0;
1682 high = nlists - 1;
1683
1684 /* Binary search extent irec's */
1685 while (low <= high) {
1686 erp_idx = (low + high) >> 1;
1687 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1688 prev = erp_idx > 0 ? erp - 1 : NULL;
1689 if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
1690 realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
1691 high = erp_idx - 1;
1692 } else if (page_idx > erp->er_extoff + erp->er_extcount ||
1693 (page_idx == erp->er_extoff + erp->er_extcount &&
1694 !realloc)) {
1695 low = erp_idx + 1;
1696 } else if (page_idx == erp->er_extoff + erp->er_extcount &&
1697 erp->er_extcount == XFS_LINEAR_EXTS) {
1698 ASSERT(realloc);
1699 page_idx = 0;
1700 erp_idx++;
1701 erp = erp_idx < nlists ? erp + 1 : NULL;
1702 break;
1703 } else {
1704 page_idx -= erp->er_extoff;
1705 break;
1706 }
1707 }
1708 *idxp = page_idx;
1709 *erp_idxp = erp_idx;
Eric Sandeend99831f2014-06-22 15:03:54 +10001710 return erp;
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001711}
1712
1713/*
1714 * Allocate and initialize an indirection array once the space needed
1715 * for incore extents increases above XFS_IEXT_BUFSZ.
1716 */
1717void
1718xfs_iext_irec_init(
1719 xfs_ifork_t *ifp) /* inode fork pointer */
1720{
1721 xfs_ext_irec_t *erp; /* indirection array pointer */
1722 xfs_extnum_t nextents; /* number of extents in file */
1723
1724 ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
Eric Sandeen5d829302016-11-08 12:59:42 +11001725 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001726 ASSERT(nextents <= XFS_LINEAR_EXTS);
1727
1728 erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
1729
1730 if (nextents == 0) {
1731 ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1732 } else if (!ifp->if_real_bytes) {
1733 xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
1734 } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
1735 xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
1736 }
1737 erp->er_extbuf = ifp->if_u1.if_extents;
1738 erp->er_extcount = nextents;
1739 erp->er_extoff = 0;
1740
1741 ifp->if_flags |= XFS_IFEXTIREC;
1742 ifp->if_real_bytes = XFS_IEXT_BUFSZ;
1743 ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
1744 ifp->if_u1.if_ext_irec = erp;
1745
1746 return;
1747}
1748
1749/*
1750 * Allocate and initialize a new entry in the indirection array.
1751 */
1752xfs_ext_irec_t *
1753xfs_iext_irec_new(
1754 xfs_ifork_t *ifp, /* inode fork pointer */
1755 int erp_idx) /* index for new irec */
1756{
1757 xfs_ext_irec_t *erp; /* indirection array pointer */
1758 int i; /* loop counter */
1759 int nlists; /* number of irec's (ex lists) */
1760
1761 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1762 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1763
1764 /* Resize indirection array */
1765 xfs_iext_realloc_indirect(ifp, ++nlists *
1766 sizeof(xfs_ext_irec_t));
1767 /*
1768 * Move records down in the array so the
1769 * new page can use erp_idx.
1770 */
1771 erp = ifp->if_u1.if_ext_irec;
1772 for (i = nlists - 1; i > erp_idx; i--) {
1773 memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
1774 }
1775 ASSERT(i == erp_idx);
1776
1777 /* Initialize new extent record */
1778 erp = ifp->if_u1.if_ext_irec;
1779 erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
1780 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1781 memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
1782 erp[erp_idx].er_extcount = 0;
1783 erp[erp_idx].er_extoff = erp_idx > 0 ?
1784 erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
1785 return (&erp[erp_idx]);
1786}
1787
1788/*
1789 * Remove a record from the indirection array.
1790 */
1791void
1792xfs_iext_irec_remove(
1793 xfs_ifork_t *ifp, /* inode fork pointer */
1794 int erp_idx) /* irec index to remove */
1795{
1796 xfs_ext_irec_t *erp; /* indirection array pointer */
1797 int i; /* loop counter */
1798 int nlists; /* number of irec's (ex lists) */
1799
1800 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1801 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1802 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1803 if (erp->er_extbuf) {
1804 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
1805 -erp->er_extcount);
1806 kmem_free(erp->er_extbuf);
1807 }
1808 /* Compact extent records */
1809 erp = ifp->if_u1.if_ext_irec;
1810 for (i = erp_idx; i < nlists - 1; i++) {
1811 memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
1812 }
1813 /*
1814 * Manually free the last extent record from the indirection
1815 * array. A call to xfs_iext_realloc_indirect() with a size
1816 * of zero would result in a call to xfs_iext_destroy() which
1817 * would in turn call this function again, creating a nasty
1818 * infinite loop.
1819 */
1820 if (--nlists) {
1821 xfs_iext_realloc_indirect(ifp,
1822 nlists * sizeof(xfs_ext_irec_t));
1823 } else {
1824 kmem_free(ifp->if_u1.if_ext_irec);
1825 }
1826 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
1827}
1828
1829/*
1830 * This is called to clean up large amounts of unused memory allocated
1831 * by the indirection array. Before compacting anything though, verify
1832 * that the indirection array is still needed and switch back to the
1833 * linear extent list (or even the inline buffer) if possible. The
1834 * compaction policy is as follows:
1835 *
1836 * Full Compaction: Extents fit into a single page (or inline buffer)
1837 * Partial Compaction: Extents occupy less than 50% of allocated space
1838 * No Compaction: Extents occupy at least 50% of allocated space
1839 */
1840void
1841xfs_iext_irec_compact(
1842 xfs_ifork_t *ifp) /* inode fork pointer */
1843{
1844 xfs_extnum_t nextents; /* number of extents in file */
1845 int nlists; /* number of irec's (ex lists) */
1846
1847 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1848 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
Eric Sandeen5d829302016-11-08 12:59:42 +11001849 nextents = xfs_iext_count(ifp);
Dave Chinner5c4d97d2013-08-12 20:49:33 +10001850
1851 if (nextents == 0) {
1852 xfs_iext_destroy(ifp);
1853 } else if (nextents <= XFS_INLINE_EXTS) {
1854 xfs_iext_indirect_to_direct(ifp);
1855 xfs_iext_direct_to_inline(ifp, nextents);
1856 } else if (nextents <= XFS_LINEAR_EXTS) {
1857 xfs_iext_indirect_to_direct(ifp);
1858 } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
1859 xfs_iext_irec_compact_pages(ifp);
1860 }
1861}
1862
1863/*
1864 * Combine extents from neighboring extent pages.
1865 */
1866void
1867xfs_iext_irec_compact_pages(
1868 xfs_ifork_t *ifp) /* inode fork pointer */
1869{
1870 xfs_ext_irec_t *erp, *erp_next;/* pointers to irec entries */
1871 int erp_idx = 0; /* indirection array index */
1872 int nlists; /* number of irec's (ex lists) */
1873
1874 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1875 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1876 while (erp_idx < nlists - 1) {
1877 erp = &ifp->if_u1.if_ext_irec[erp_idx];
1878 erp_next = erp + 1;
1879 if (erp_next->er_extcount <=
1880 (XFS_LINEAR_EXTS - erp->er_extcount)) {
1881 memcpy(&erp->er_extbuf[erp->er_extcount],
1882 erp_next->er_extbuf, erp_next->er_extcount *
1883 sizeof(xfs_bmbt_rec_t));
1884 erp->er_extcount += erp_next->er_extcount;
1885 /*
1886 * Free page before removing extent record
1887 * so er_extoffs don't get modified in
1888 * xfs_iext_irec_remove.
1889 */
1890 kmem_free(erp_next->er_extbuf);
1891 erp_next->er_extbuf = NULL;
1892 xfs_iext_irec_remove(ifp, erp_idx + 1);
1893 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1894 } else {
1895 erp_idx++;
1896 }
1897 }
1898}
1899
1900/*
1901 * This is called to update the er_extoff field in the indirection
1902 * array when extents have been added or removed from one of the
1903 * extent lists. erp_idx contains the irec index to begin updating
1904 * at and ext_diff contains the number of extents that were added
1905 * or removed.
1906 */
1907void
1908xfs_iext_irec_update_extoffs(
1909 xfs_ifork_t *ifp, /* inode fork pointer */
1910 int erp_idx, /* irec index to update */
1911 int ext_diff) /* number of new extents */
1912{
1913 int i; /* loop counter */
1914 int nlists; /* number of irec's (ex lists */
1915
1916 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1917 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1918 for (i = erp_idx; i < nlists; i++) {
1919 ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
1920 }
1921}
Darrick J. Wong3993bae2016-10-03 09:11:32 -07001922
1923/*
1924 * Initialize an inode's copy-on-write fork.
1925 */
1926void
1927xfs_ifork_init_cow(
1928 struct xfs_inode *ip)
1929{
1930 if (ip->i_cowfp)
1931 return;
1932
1933 ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
1934 KM_SLEEP | KM_NOFS);
1935 ip->i_cowfp->if_flags = XFS_IFEXTENTS;
1936 ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
1937 ip->i_cnextents = 0;
1938}
Christoph Hellwig93533c72016-11-24 11:39:32 +11001939
1940/*
1941 * Lookup the extent covering bno.
1942 *
1943 * If there is an extent covering bno return the extent index, and store the
1944 * expanded extent structure in *gotp, and the extent index in *idx.
1945 * If there is no extent covering bno, but there is an extent after it (e.g.
1946 * it lies in a hole) return that extent in *gotp and its index in *idx
1947 * instead.
1948 * If bno is beyond the last extent return false, and return the index after
1949 * the last valid index in *idxp.
1950 */
1951bool
1952xfs_iext_lookup_extent(
1953 struct xfs_inode *ip,
1954 struct xfs_ifork *ifp,
1955 xfs_fileoff_t bno,
1956 xfs_extnum_t *idxp,
1957 struct xfs_bmbt_irec *gotp)
1958{
1959 struct xfs_bmbt_rec_host *ep;
1960
1961 XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1962
1963 ep = xfs_iext_bno_to_ext(ifp, bno, idxp);
1964 if (!ep)
1965 return false;
1966 xfs_bmbt_get_all(ep, gotp);
1967 return true;
1968}
1969
1970/*
1971 * Return true if there is an extent at index idx, and return the expanded
1972 * extent structure at idx in that case. Else return false.
1973 */
1974bool
1975xfs_iext_get_extent(
1976 struct xfs_ifork *ifp,
1977 xfs_extnum_t idx,
1978 struct xfs_bmbt_irec *gotp)
1979{
1980 if (idx < 0 || idx >= xfs_iext_count(ifp))
1981 return false;
1982 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), gotp);
1983 return true;
1984}
Christoph Hellwig67e4e692017-08-29 15:44:11 -07001985
1986void
1987xfs_iext_update_extent(
Christoph Hellwigca5d8e52017-10-19 11:04:44 -07001988 struct xfs_inode *ip,
1989 int state,
Christoph Hellwig67e4e692017-08-29 15:44:11 -07001990 xfs_extnum_t idx,
1991 struct xfs_bmbt_irec *gotp)
1992{
Christoph Hellwigca5d8e52017-10-19 11:04:44 -07001993 struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
1994
Christoph Hellwig67e4e692017-08-29 15:44:11 -07001995 ASSERT(idx >= 0);
1996 ASSERT(idx < xfs_iext_count(ifp));
1997
Christoph Hellwigca5d8e52017-10-19 11:04:44 -07001998 trace_xfs_bmap_pre_update(ip, idx, state, _RET_IP_);
Christoph Hellwig67e4e692017-08-29 15:44:11 -07001999 xfs_bmbt_set_all(xfs_iext_get_ext(ifp, idx), gotp);
Christoph Hellwigca5d8e52017-10-19 11:04:44 -07002000 trace_xfs_bmap_post_update(ip, idx, state, _RET_IP_);
Christoph Hellwig67e4e692017-08-29 15:44:11 -07002001}