blob: a30be03395fb6a44ffbd211969da1c2395cc9c31 [file] [log] [blame]
Darrick J. Wong3993bae2016-10-03 09:11:32 -07001/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_mount.h"
27#include "xfs_defer.h"
28#include "xfs_da_format.h"
29#include "xfs_da_btree.h"
30#include "xfs_inode.h"
31#include "xfs_trans.h"
32#include "xfs_inode_item.h"
33#include "xfs_bmap.h"
34#include "xfs_bmap_util.h"
35#include "xfs_error.h"
36#include "xfs_dir2.h"
37#include "xfs_dir2_priv.h"
38#include "xfs_ioctl.h"
39#include "xfs_trace.h"
40#include "xfs_log.h"
41#include "xfs_icache.h"
42#include "xfs_pnfs.h"
43#include "xfs_refcount_btree.h"
44#include "xfs_refcount.h"
45#include "xfs_bmap_btree.h"
46#include "xfs_trans_space.h"
47#include "xfs_bit.h"
48#include "xfs_alloc.h"
49#include "xfs_quota_defs.h"
50#include "xfs_quota.h"
51#include "xfs_btree.h"
52#include "xfs_bmap_btree.h"
53#include "xfs_reflink.h"
Darrick J. Wong2a067052016-10-03 09:11:33 -070054#include "xfs_iomap.h"
Darrick J. Wong43caeb12016-10-03 09:11:35 -070055#include "xfs_rmap_btree.h"
Darrick J. Wong3993bae2016-10-03 09:11:32 -070056
57/*
58 * Copy on Write of Shared Blocks
59 *
60 * XFS must preserve "the usual" file semantics even when two files share
61 * the same physical blocks. This means that a write to one file must not
62 * alter the blocks in a different file; the way that we'll do that is
63 * through the use of a copy-on-write mechanism. At a high level, that
64 * means that when we want to write to a shared block, we allocate a new
65 * block, write the data to the new block, and if that succeeds we map the
66 * new block into the file.
67 *
68 * XFS provides a "delayed allocation" mechanism that defers the allocation
69 * of disk blocks to dirty-but-not-yet-mapped file blocks as long as
70 * possible. This reduces fragmentation by enabling the filesystem to ask
71 * for bigger chunks less often, which is exactly what we want for CoW.
72 *
73 * The delalloc mechanism begins when the kernel wants to make a block
74 * writable (write_begin or page_mkwrite). If the offset is not mapped, we
75 * create a delalloc mapping, which is a regular in-core extent, but without
76 * a real startblock. (For delalloc mappings, the startblock encodes both
77 * a flag that this is a delalloc mapping, and a worst-case estimate of how
78 * many blocks might be required to put the mapping into the BMBT.) delalloc
79 * mappings are a reservation against the free space in the filesystem;
80 * adjacent mappings can also be combined into fewer larger mappings.
81 *
82 * When dirty pages are being written out (typically in writepage), the
83 * delalloc reservations are converted into real mappings by allocating
84 * blocks and replacing the delalloc mapping with real ones. A delalloc
85 * mapping can be replaced by several real ones if the free space is
86 * fragmented.
87 *
88 * We want to adapt the delalloc mechanism for copy-on-write, since the
89 * write paths are similar. The first two steps (creating the reservation
90 * and allocating the blocks) are exactly the same as delalloc except that
91 * the mappings must be stored in a separate CoW fork because we do not want
92 * to disturb the mapping in the data fork until we're sure that the write
93 * succeeded. IO completion in this case is the process of removing the old
94 * mapping from the data fork and moving the new mapping from the CoW fork to
95 * the data fork. This will be discussed shortly.
96 *
97 * For now, unaligned directio writes will be bounced back to the page cache.
98 * Block-aligned directio writes will use the same mechanism as buffered
99 * writes.
100 *
101 * CoW remapping must be done after the data block write completes,
102 * because we don't want to destroy the old data fork map until we're sure
103 * the new block has been written. Since the new mappings are kept in a
104 * separate fork, we can simply iterate these mappings to find the ones
105 * that cover the file blocks that we just CoW'd. For each extent, simply
106 * unmap the corresponding range in the data fork, map the new range into
107 * the data fork, and remove the extent from the CoW fork.
108 *
109 * Since the remapping operation can be applied to an arbitrary file
110 * range, we record the need for the remap step as a flag in the ioend
111 * instead of declaring a new IO type. This is required for direct io
112 * because we only have ioend for the whole dio, and we have to be able to
113 * remember the presence of unwritten blocks and CoW blocks with a single
114 * ioend structure. Better yet, the more ground we can cover with one
115 * ioend, the better.
116 */
Darrick J. Wong2a067052016-10-03 09:11:33 -0700117
118/*
119 * Given an AG extent, find the lowest-numbered run of shared blocks
120 * within that range and return the range in fbno/flen. If
121 * find_end_of_shared is true, return the longest contiguous extent of
122 * shared blocks. If there are no shared extents, fbno and flen will
123 * be set to NULLAGBLOCK and 0, respectively.
124 */
125int
126xfs_reflink_find_shared(
127 struct xfs_mount *mp,
128 xfs_agnumber_t agno,
129 xfs_agblock_t agbno,
130 xfs_extlen_t aglen,
131 xfs_agblock_t *fbno,
132 xfs_extlen_t *flen,
133 bool find_end_of_shared)
134{
135 struct xfs_buf *agbp;
136 struct xfs_btree_cur *cur;
137 int error;
138
139 error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
140 if (error)
141 return error;
142
143 cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
144
145 error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
146 find_end_of_shared);
147
148 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
149
150 xfs_buf_relse(agbp);
151 return error;
152}
153
154/*
155 * Trim the mapping to the next block where there's a change in the
156 * shared/unshared status. More specifically, this means that we
157 * find the lowest-numbered extent of shared blocks that coincides with
158 * the given block mapping. If the shared extent overlaps the start of
159 * the mapping, trim the mapping to the end of the shared extent. If
160 * the shared region intersects the mapping, trim the mapping to the
161 * start of the shared extent. If there are no shared regions that
162 * overlap, just return the original extent.
163 */
164int
165xfs_reflink_trim_around_shared(
166 struct xfs_inode *ip,
167 struct xfs_bmbt_irec *irec,
168 bool *shared,
169 bool *trimmed)
170{
171 xfs_agnumber_t agno;
172 xfs_agblock_t agbno;
173 xfs_extlen_t aglen;
174 xfs_agblock_t fbno;
175 xfs_extlen_t flen;
176 int error = 0;
177
178 /* Holes, unwritten, and delalloc extents cannot be shared */
179 if (!xfs_is_reflink_inode(ip) ||
180 ISUNWRITTEN(irec) ||
181 irec->br_startblock == HOLESTARTBLOCK ||
182 irec->br_startblock == DELAYSTARTBLOCK) {
183 *shared = false;
184 return 0;
185 }
186
187 trace_xfs_reflink_trim_around_shared(ip, irec);
188
189 agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock);
190 agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
191 aglen = irec->br_blockcount;
192
193 error = xfs_reflink_find_shared(ip->i_mount, agno, agbno,
194 aglen, &fbno, &flen, true);
195 if (error)
196 return error;
197
198 *shared = *trimmed = false;
199 if (fbno == NULLAGBLOCK) {
200 /* No shared blocks at all. */
201 return 0;
202 } else if (fbno == agbno) {
203 /*
204 * The start of this extent is shared. Truncate the
205 * mapping at the end of the shared region so that a
206 * subsequent iteration starts at the start of the
207 * unshared region.
208 */
209 irec->br_blockcount = flen;
210 *shared = true;
211 if (flen != aglen)
212 *trimmed = true;
213 return 0;
214 } else {
215 /*
216 * There's a shared extent midway through this extent.
217 * Truncate the mapping at the start of the shared
218 * extent so that a subsequent iteration starts at the
219 * start of the shared region.
220 */
221 irec->br_blockcount = fbno - agbno;
222 *trimmed = true;
223 return 0;
224 }
225}
226
227/* Create a CoW reservation for a range of blocks within a file. */
228static int
229__xfs_reflink_reserve_cow(
230 struct xfs_inode *ip,
231 xfs_fileoff_t *offset_fsb,
Darrick J. Wong0613f162016-10-03 09:11:37 -0700232 xfs_fileoff_t end_fsb,
233 bool *skipped)
Darrick J. Wong2a067052016-10-03 09:11:33 -0700234{
235 struct xfs_bmbt_irec got, prev, imap;
236 xfs_fileoff_t orig_end_fsb;
237 int nimaps, eof = 0, error = 0;
238 bool shared = false, trimmed = false;
239 xfs_extnum_t idx;
240
241 /* Already reserved? Skip the refcount btree access. */
242 xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
243 &got, &prev);
244 if (!eof && got.br_startoff <= *offset_fsb) {
245 end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
246 trace_xfs_reflink_cow_found(ip, &got);
247 goto done;
248 }
249
250 /* Read extent from the source file. */
251 nimaps = 1;
252 error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
253 &imap, &nimaps, 0);
254 if (error)
255 goto out_unlock;
256 ASSERT(nimaps == 1);
257
258 /* Trim the mapping to the nearest shared extent boundary. */
259 error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
260 if (error)
261 goto out_unlock;
262
263 end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
264
265 /* Not shared? Just report the (potentially capped) extent. */
Darrick J. Wong0613f162016-10-03 09:11:37 -0700266 if (!shared) {
267 *skipped = true;
Darrick J. Wong2a067052016-10-03 09:11:33 -0700268 goto done;
Darrick J. Wong0613f162016-10-03 09:11:37 -0700269 }
Darrick J. Wong2a067052016-10-03 09:11:33 -0700270
271 /*
272 * Fork all the shared blocks from our write offset until the end of
273 * the extent.
274 */
275 error = xfs_qm_dqattach_locked(ip, 0);
276 if (error)
277 goto out_unlock;
278
279retry:
280 error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
281 end_fsb - *offset_fsb, &got,
282 &prev, &idx, eof);
283 switch (error) {
284 case 0:
285 break;
286 case -ENOSPC:
287 case -EDQUOT:
288 /* retry without any preallocation */
289 trace_xfs_reflink_cow_enospc(ip, &imap);
290 if (end_fsb != orig_end_fsb) {
291 end_fsb = orig_end_fsb;
292 goto retry;
293 }
294 /*FALLTHRU*/
295 default:
296 goto out_unlock;
297 }
298
299 trace_xfs_reflink_cow_alloc(ip, &got);
300done:
301 *offset_fsb = end_fsb;
302out_unlock:
303 return error;
304}
305
306/* Create a CoW reservation for part of a file. */
307int
308xfs_reflink_reserve_cow_range(
309 struct xfs_inode *ip,
310 xfs_off_t offset,
311 xfs_off_t count)
312{
313 struct xfs_mount *mp = ip->i_mount;
314 xfs_fileoff_t offset_fsb, end_fsb;
Darrick J. Wong0613f162016-10-03 09:11:37 -0700315 bool skipped = false;
Darrick J. Wong2a067052016-10-03 09:11:33 -0700316 int error;
317
318 trace_xfs_reflink_reserve_cow_range(ip, offset, count);
319
320 offset_fsb = XFS_B_TO_FSBT(mp, offset);
321 end_fsb = XFS_B_TO_FSB(mp, offset + count);
322
323 xfs_ilock(ip, XFS_ILOCK_EXCL);
324 while (offset_fsb < end_fsb) {
Darrick J. Wong0613f162016-10-03 09:11:37 -0700325 error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
326 &skipped);
Darrick J. Wong2a067052016-10-03 09:11:33 -0700327 if (error) {
328 trace_xfs_reflink_reserve_cow_range_error(ip, error,
329 _RET_IP_);
330 break;
331 }
332 }
333 xfs_iunlock(ip, XFS_ILOCK_EXCL);
334
335 return error;
336}
Darrick J. Wongef473662016-10-03 09:11:34 -0700337
Darrick J. Wong0613f162016-10-03 09:11:37 -0700338/* Allocate all CoW reservations covering a range of blocks in a file. */
339static int
340__xfs_reflink_allocate_cow(
341 struct xfs_inode *ip,
342 xfs_fileoff_t *offset_fsb,
343 xfs_fileoff_t end_fsb)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_bmbt_irec imap;
347 struct xfs_defer_ops dfops;
348 struct xfs_trans *tp;
349 xfs_fsblock_t first_block;
350 xfs_fileoff_t next_fsb;
351 int nimaps = 1, error;
352 bool skipped = false;
353
354 xfs_defer_init(&dfops, &first_block);
355
356 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
357 XFS_TRANS_RESERVE, &tp);
358 if (error)
359 return error;
360
361 xfs_ilock(ip, XFS_ILOCK_EXCL);
362
363 next_fsb = *offset_fsb;
364 error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
365 if (error)
366 goto out_trans_cancel;
367
368 if (skipped) {
369 *offset_fsb = next_fsb;
370 goto out_trans_cancel;
371 }
372
373 xfs_trans_ijoin(tp, ip, 0);
374 error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
375 XFS_BMAPI_COWFORK, &first_block,
376 XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
377 &imap, &nimaps, &dfops);
378 if (error)
379 goto out_trans_cancel;
380
381 /* We might not have been able to map the whole delalloc extent */
382 *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
383
384 error = xfs_defer_finish(&tp, &dfops, NULL);
385 if (error)
386 goto out_trans_cancel;
387
388 error = xfs_trans_commit(tp);
389
390out_unlock:
391 xfs_iunlock(ip, XFS_ILOCK_EXCL);
392 return error;
393out_trans_cancel:
394 xfs_defer_cancel(&dfops);
395 xfs_trans_cancel(tp);
396 goto out_unlock;
397}
398
399/* Allocate all CoW reservations covering a part of a file. */
400int
401xfs_reflink_allocate_cow_range(
402 struct xfs_inode *ip,
403 xfs_off_t offset,
404 xfs_off_t count)
405{
406 struct xfs_mount *mp = ip->i_mount;
407 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
408 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
409 int error;
410
411 ASSERT(xfs_is_reflink_inode(ip));
412
413 trace_xfs_reflink_allocate_cow_range(ip, offset, count);
414
415 /*
416 * Make sure that the dquots are there.
417 */
418 error = xfs_qm_dqattach(ip, 0);
419 if (error)
420 return error;
421
422 while (offset_fsb < end_fsb) {
423 error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
424 if (error) {
425 trace_xfs_reflink_allocate_cow_range_error(ip, error,
426 _RET_IP_);
427 break;
428 }
429 }
430
431 return error;
432}
433
Darrick J. Wongef473662016-10-03 09:11:34 -0700434/*
435 * Find the CoW reservation (and whether or not it needs block allocation)
436 * for a given byte offset of a file.
437 */
438bool
439xfs_reflink_find_cow_mapping(
440 struct xfs_inode *ip,
441 xfs_off_t offset,
442 struct xfs_bmbt_irec *imap,
443 bool *need_alloc)
444{
445 struct xfs_bmbt_irec irec;
446 struct xfs_ifork *ifp;
447 struct xfs_bmbt_rec_host *gotp;
448 xfs_fileoff_t bno;
449 xfs_extnum_t idx;
450
451 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
452 ASSERT(xfs_is_reflink_inode(ip));
453
454 /* Find the extent in the CoW fork. */
455 ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
456 bno = XFS_B_TO_FSBT(ip->i_mount, offset);
457 gotp = xfs_iext_bno_to_ext(ifp, bno, &idx);
458 if (!gotp)
459 return false;
460
461 xfs_bmbt_get_all(gotp, &irec);
462 if (bno >= irec.br_startoff + irec.br_blockcount ||
463 bno < irec.br_startoff)
464 return false;
465
466 trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
467 &irec);
468
469 /* If it's still delalloc, we must allocate later. */
470 *imap = irec;
471 *need_alloc = !!(isnullstartblock(irec.br_startblock));
472
473 return true;
474}
475
476/*
477 * Trim an extent to end at the next CoW reservation past offset_fsb.
478 */
479int
480xfs_reflink_trim_irec_to_next_cow(
481 struct xfs_inode *ip,
482 xfs_fileoff_t offset_fsb,
483 struct xfs_bmbt_irec *imap)
484{
485 struct xfs_bmbt_irec irec;
486 struct xfs_ifork *ifp;
487 struct xfs_bmbt_rec_host *gotp;
488 xfs_extnum_t idx;
489
490 if (!xfs_is_reflink_inode(ip))
491 return 0;
492
493 /* Find the extent in the CoW fork. */
494 ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
495 gotp = xfs_iext_bno_to_ext(ifp, offset_fsb, &idx);
496 if (!gotp)
497 return 0;
498 xfs_bmbt_get_all(gotp, &irec);
499
500 /* This is the extent before; try sliding up one. */
501 if (irec.br_startoff < offset_fsb) {
502 idx++;
503 if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
504 return 0;
505 gotp = xfs_iext_get_ext(ifp, idx);
506 xfs_bmbt_get_all(gotp, &irec);
507 }
508
509 if (irec.br_startoff >= imap->br_startoff + imap->br_blockcount)
510 return 0;
511
512 imap->br_blockcount = irec.br_startoff - imap->br_startoff;
513 trace_xfs_reflink_trim_irec(ip, imap);
514
515 return 0;
516}
Darrick J. Wong43caeb12016-10-03 09:11:35 -0700517
518/*
519 * Cancel all pending CoW reservations for some block range of an inode.
520 */
521int
522xfs_reflink_cancel_cow_blocks(
523 struct xfs_inode *ip,
524 struct xfs_trans **tpp,
525 xfs_fileoff_t offset_fsb,
526 xfs_fileoff_t end_fsb)
527{
528 struct xfs_bmbt_irec irec;
529 xfs_filblks_t count_fsb;
530 xfs_fsblock_t firstfsb;
531 struct xfs_defer_ops dfops;
532 int error = 0;
533 int nimaps;
534
535 if (!xfs_is_reflink_inode(ip))
536 return 0;
537
538 /* Go find the old extent in the CoW fork. */
539 while (offset_fsb < end_fsb) {
540 nimaps = 1;
541 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
542 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
543 &nimaps, XFS_BMAPI_COWFORK);
544 if (error)
545 break;
546 ASSERT(nimaps == 1);
547
548 trace_xfs_reflink_cancel_cow(ip, &irec);
549
550 if (irec.br_startblock == DELAYSTARTBLOCK) {
551 /* Free a delayed allocation. */
552 xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
553 false);
554 ip->i_delayed_blks -= irec.br_blockcount;
555
556 /* Remove the mapping from the CoW fork. */
557 error = xfs_bunmapi_cow(ip, &irec);
558 if (error)
559 break;
560 } else if (irec.br_startblock == HOLESTARTBLOCK) {
561 /* empty */
562 } else {
563 xfs_trans_ijoin(*tpp, ip, 0);
564 xfs_defer_init(&dfops, &firstfsb);
565
566 xfs_bmap_add_free(ip->i_mount, &dfops,
567 irec.br_startblock, irec.br_blockcount,
568 NULL);
569
570 /* Update quota accounting */
571 xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
572 -(long)irec.br_blockcount);
573
574 /* Roll the transaction */
575 error = xfs_defer_finish(tpp, &dfops, ip);
576 if (error) {
577 xfs_defer_cancel(&dfops);
578 break;
579 }
580
581 /* Remove the mapping from the CoW fork. */
582 error = xfs_bunmapi_cow(ip, &irec);
583 if (error)
584 break;
585 }
586
587 /* Roll on... */
588 offset_fsb = irec.br_startoff + irec.br_blockcount;
589 }
590
591 return error;
592}
593
594/*
595 * Cancel all pending CoW reservations for some byte range of an inode.
596 */
597int
598xfs_reflink_cancel_cow_range(
599 struct xfs_inode *ip,
600 xfs_off_t offset,
601 xfs_off_t count)
602{
603 struct xfs_trans *tp;
604 xfs_fileoff_t offset_fsb;
605 xfs_fileoff_t end_fsb;
606 int error;
607
608 trace_xfs_reflink_cancel_cow_range(ip, offset, count);
609
610 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
611 if (count == NULLFILEOFF)
612 end_fsb = NULLFILEOFF;
613 else
614 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
615
616 /* Start a rolling transaction to remove the mappings */
617 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
618 0, 0, 0, &tp);
619 if (error)
620 goto out;
621
622 xfs_ilock(ip, XFS_ILOCK_EXCL);
623 xfs_trans_ijoin(tp, ip, 0);
624
625 /* Scrape out the old CoW reservations */
626 error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
627 if (error)
628 goto out_cancel;
629
630 error = xfs_trans_commit(tp);
631
632 xfs_iunlock(ip, XFS_ILOCK_EXCL);
633 return error;
634
635out_cancel:
636 xfs_trans_cancel(tp);
637 xfs_iunlock(ip, XFS_ILOCK_EXCL);
638out:
639 trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_);
640 return error;
641}
642
643/*
644 * Remap parts of a file's data fork after a successful CoW.
645 */
646int
647xfs_reflink_end_cow(
648 struct xfs_inode *ip,
649 xfs_off_t offset,
650 xfs_off_t count)
651{
652 struct xfs_bmbt_irec irec;
653 struct xfs_bmbt_irec uirec;
654 struct xfs_trans *tp;
655 xfs_fileoff_t offset_fsb;
656 xfs_fileoff_t end_fsb;
657 xfs_filblks_t count_fsb;
658 xfs_fsblock_t firstfsb;
659 struct xfs_defer_ops dfops;
660 int error;
661 unsigned int resblks;
662 xfs_filblks_t ilen;
663 xfs_filblks_t rlen;
664 int nimaps;
665
666 trace_xfs_reflink_end_cow(ip, offset, count);
667
668 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
669 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
670 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
671
672 /* Start a rolling transaction to switch the mappings */
673 resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
674 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
675 resblks, 0, 0, &tp);
676 if (error)
677 goto out;
678
679 xfs_ilock(ip, XFS_ILOCK_EXCL);
680 xfs_trans_ijoin(tp, ip, 0);
681
682 /* Go find the old extent in the CoW fork. */
683 while (offset_fsb < end_fsb) {
684 /* Read extent from the source file */
685 nimaps = 1;
686 count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
687 error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
688 &nimaps, XFS_BMAPI_COWFORK);
689 if (error)
690 goto out_cancel;
691 ASSERT(nimaps == 1);
692
693 ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
694 trace_xfs_reflink_cow_remap(ip, &irec);
695
696 /*
697 * We can have a hole in the CoW fork if part of a directio
698 * write is CoW but part of it isn't.
699 */
700 rlen = ilen = irec.br_blockcount;
701 if (irec.br_startblock == HOLESTARTBLOCK)
702 goto next_extent;
703
704 /* Unmap the old blocks in the data fork. */
705 while (rlen) {
706 xfs_defer_init(&dfops, &firstfsb);
707 error = __xfs_bunmapi(tp, ip, irec.br_startoff,
708 &rlen, 0, 1, &firstfsb, &dfops);
709 if (error)
710 goto out_defer;
711
712 /*
713 * Trim the extent to whatever got unmapped.
714 * Remember, bunmapi works backwards.
715 */
716 uirec.br_startblock = irec.br_startblock + rlen;
717 uirec.br_startoff = irec.br_startoff + rlen;
718 uirec.br_blockcount = irec.br_blockcount - rlen;
719 irec.br_blockcount = rlen;
720 trace_xfs_reflink_cow_remap_piece(ip, &uirec);
721
722 /* Map the new blocks into the data fork. */
723 error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
724 ip, &uirec);
725 if (error)
726 goto out_defer;
727
728 /* Remove the mapping from the CoW fork. */
729 error = xfs_bunmapi_cow(ip, &uirec);
730 if (error)
731 goto out_defer;
732
733 error = xfs_defer_finish(&tp, &dfops, ip);
734 if (error)
735 goto out_defer;
736 }
737
738next_extent:
739 /* Roll on... */
740 offset_fsb = irec.br_startoff + ilen;
741 }
742
743 error = xfs_trans_commit(tp);
744 xfs_iunlock(ip, XFS_ILOCK_EXCL);
745 if (error)
746 goto out;
747 return 0;
748
749out_defer:
750 xfs_defer_cancel(&dfops);
751out_cancel:
752 xfs_trans_cancel(tp);
753 xfs_iunlock(ip, XFS_ILOCK_EXCL);
754out:
755 trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
756 return error;
757}